diff --git a/.travis.yml b/.asf.yaml similarity index 63% rename from .travis.yml rename to .asf.yaml index bfc6b34bbf..776f8a6f7c 100644 --- a/.travis.yml +++ b/.asf.yaml @@ -14,18 +14,25 @@ # See the License for the specific language governing permissions and # limitations under the License. -language: java - -sudo: required - -env: MAVEN_OPTS="-Xmx2G -XX:MaxPermSize=512M" - -jdk: - - oraclejdk8 - -before_install: - - ./build-tools/install-protobuf.sh - -script: - - mvn -B clean install package -DskipTests=true -Dmaven.javadoc.skip=true - +github: + description: "Apache Tez" + homepage: https://tez.apache.org/ + labels: + - tez + - java + - apache + - big-data + - hadoop + features: + wiki: false + issues: false + projects: false + enabled_merge_buttons: + squash: true + merge: false + rebase: false +notifications: + commits: commits@tez.apache.org + issues: issues@tez.apache.org + pullrequests: issues@tez.apache.org + jira_options: link label worklog diff --git a/build-tools/install-protobuf.sh b/.github/workflows/build.yml old mode 100755 new mode 100644 similarity index 60% rename from build-tools/install-protobuf.sh rename to .github/workflows/build.yml index 902049dab1..f81c1ad8b1 --- a/build-tools/install-protobuf.sh +++ b/.github/workflows/build.yml @@ -1,5 +1,3 @@ -#!/bin/sh - # Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information @@ -15,8 +13,26 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +name: Build CI with different platforms/configs + +on: + push: + branches: + - 'master' + pull_request: + branches: + - 'master' -set -ex -wget https://github.com/google/protobuf/releases/download/v2.5.0/protobuf-2.5.0.tar.gz -tar -xzvf protobuf-2.5.0.tar.gz -cd protobuf-2.5.0 && ./configure --prefix=/usr && make && sudo make install +jobs: + build: + strategy: + matrix: + java-version: [8, 11, 17] + os: [ubuntu-latest, macos-latest] + runs-on: ${{ matrix.os }} + steps: + - uses: actions/checkout@v2 + - uses: actions/setup-java@v1 + with: + java-version: ${{ matrix.java-version }} + - run: mvn clean install -DskipTests -Dmaven.javadoc.skip=true diff --git a/.gitignore b/.gitignore index 01c99576d6..85d660672c 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,7 @@ *.ipr *.iws *.DS_Store +*.pyc .idea .svn .classpath diff --git a/BUILDING.txt b/BUILDING.txt index 34ed2fbff1..57c9cd13c0 100644 --- a/BUILDING.txt +++ b/BUILDING.txt @@ -6,10 +6,10 @@ https://cwiki.apache.org/confluence/display/TEZ ---------------------------------------------------------------------------------- Requirements: -* JDK 1.7+ -* Maven 3.1 or later +* JDK 1.8+ +* Maven 3.6.3 or later * Findbugs 2.0.2 or later (if running findbugs) -* ProtocolBuffer 2.5.0 +* ProtocolBuffer 3.21.1 * Internet connection for first build (to fetch all dependencies) * Hadoop version should be 2.7.0 or higher. @@ -52,6 +52,7 @@ Build options: * Use -Dclover.license to specify the path to the clover license file * Use -Dhadoop.version to specify the version of hadoop to build tez against * Use -Dprotoc.path to specify the path to protoc + * Use -Dallow.root.build to root build tez-ui components Tests options: @@ -103,16 +104,29 @@ Issue with PhantomJS on building in PowerPC. please try installing PhantomJS manually and rerun. Refer https://github.com/ibmsoe/phantomjs-1/blob/v2.1.1-ppc64/README.md and install it globally for the build to work. +---------------------------------------------------------------------------------- +Skip UI build: + +In case you want to completely skip UI build, you can use 'noui' profile. +For instance, a full build without tests and tez-ui looks like: + + $ mvn clean install -DskipTests -Pnoui + +It's important to note that maven will still include tez-ui project, but all of the maven plugins are skipped. + ---------------------------------------------------------------------------------- Protocol Buffer compiler: -The version of Protocol Buffer compiler, protoc, must be 2.5.0 and match the -version of the protobuf JAR. +The version of Protocol Buffer compiler, protoc, can be defined on-the-fly as: + $ mvn clean install -DskipTests -pl ./tez-api -Dprotobuf.version=3.7.1 + +The default version is defined in the root pom.xml. -If you have multiple versions of protoc in your system, you can set in your -build shell the PROTOC_PATH environment variable to point to the one you -want to use for the Tez build. If you don't define this environment variable, -protoc is looked up in the PATH. +If you have multiple versions of protoc in your system, you can set in your +build shell the PROTOC_PATH environment variable to point to the one you +want to use for the Tez build. If you don't define this environment variable then the +embedded protoc compiler will be used with the version defined in ${protobuf.version}. +It detects the platform and executes the corresponding protoc binary at build time. You can also specify the path to protoc while building using -Dprotoc.path diff --git a/Jenkinsfile b/Jenkinsfile new file mode 100644 index 0000000000..4ce236598d --- /dev/null +++ b/Jenkinsfile @@ -0,0 +1,215 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +pipeline { + + agent { + label 'Hadoop' + } + + options { + buildDiscarder(logRotator(numToKeepStr: '5')) + timeout (time: 20, unit: 'HOURS') + timestamps() + checkoutToSubdirectory('src') + } + + environment { + SOURCEDIR = 'src' + // will also need to change notification section below + PATCHDIR = 'out' + DOCKERFILE = "${SOURCEDIR}/build-tools/docker/Dockerfile" + YETUS='yetus' + // Branch or tag name. Yetus release tags are 'rel/X.Y.Z' + YETUS_VERSION='rel/0.12.0' + + } + + parameters { + string(name: 'JIRA_ISSUE_KEY', + defaultValue: '', + description: 'The JIRA issue that has a patch needing pre-commit testing. Example: HADOOP-1234') + } + + stages { + stage ('install yetus') { + steps { + dir("${WORKSPACE}/${YETUS}") { + checkout([ + $class: 'GitSCM', + branches: [[name: "${env.YETUS_VERSION}"]], + userRemoteConfigs: [[ url: 'https://github.com/apache/yetus']]] + ) + } + } + } + + stage ('precommit-run') { + steps { + withCredentials( + [usernamePassword(credentialsId: 'apache-tez-at-github.com', + passwordVariable: 'GITHUB_TOKEN', + usernameVariable: 'GITHUB_USER'), + usernamePassword(credentialsId: 'tez-ci', + passwordVariable: 'JIRA_PASSWORD', + usernameVariable: 'JIRA_USER')]) { + sh '''#!/usr/bin/env bash + + set -e + + TESTPATCHBIN="${WORKSPACE}/${YETUS}/precommit/src/main/shell/test-patch.sh" + + # this must be clean for every run + if [[ -d "${WORKSPACE}/${PATCHDIR}" ]]; then + rm -rf "${WORKSPACE}/${PATCHDIR}" + fi + mkdir -p "${WORKSPACE}/${PATCHDIR}" + + # if given a JIRA issue, process it. If CHANGE_URL is set + # (e.g., Github Branch Source plugin), process it. + # otherwise exit, because we don't want Hadoop to do a + # full build. We wouldn't normally do this check for smaller + # projects. :) + if [[ -n "${JIRA_ISSUE_KEY}" ]]; then + YETUS_ARGS+=("${JIRA_ISSUE_KEY}") + elif [[ -z "${CHANGE_URL}" ]]; then + echo "Full build skipped" > "${WORKSPACE}/${PATCHDIR}/report.html" + exit 0 + fi + + YETUS_ARGS+=("--patch-dir=${WORKSPACE}/${PATCHDIR}") + + # where the source is located + YETUS_ARGS+=("--basedir=${WORKSPACE}/${SOURCEDIR}") + + # our project defaults come from a personality file + YETUS_ARGS+=("--project=tez") + + # lots of different output formats + YETUS_ARGS+=("--brief-report-file=${WORKSPACE}/${PATCHDIR}/brief.txt") + YETUS_ARGS+=("--console-report-file=${WORKSPACE}/${PATCHDIR}/console.txt") + YETUS_ARGS+=("--html-report-file=${WORKSPACE}/${PATCHDIR}/report.html") + + # enable writing back to Github + YETUS_ARGS+=(--github-user="${GITHUB_USER}") + YETUS_ARGS+=(--github-token="${GITHUB_TOKEN}") + + # auto-kill any surefire stragglers during unit test runs + YETUS_ARGS+=("--reapermode=kill") + + # set relatively high limits for ASF machines + # changing these to higher values may cause problems + # with other jobs on systemd-enabled machines + YETUS_ARGS+=("--proclimit=5500") + YETUS_ARGS+=("--dockermemlimit=20g") + + # -1 findbugs issues that show up prior to the patch being applied + # YETUS_ARGS+=("--findbugs-strict-precheck") + + # rsync these files back into the archive dir + YETUS_ARGS+=("--archive-list=checkstyle-errors.xml,findbugsXml.xml") + + # URL for user-side presentation in reports and such to our artifacts + # (needs to match the archive bits below) + YETUS_ARGS+=("--build-url-artifacts=artifact/out") + + # plugins to enable + YETUS_ARGS+=("--plugins=all") + + # use Hadoop's bundled shelldocs + YETUS_ARGS+=("--shelldocs=${WORKSPACE}/${SOURCEDIR}/dev-support/bin/shelldocs") + + # don't let these tests cause -1s because we aren't really paying that + # much attention to them + YETUS_ARGS+=("--tests-filter=checkstyle") + + # run in docker mode and specifically point to our + # Dockerfile since we don't want to use the auto-pulled version. + YETUS_ARGS+=("--docker") + YETUS_ARGS+=("--dockerfile=${DOCKERFILE}") + YETUS_ARGS+=("--mvn-custom-repos") + + # effectively treat dev-suport as a custom maven module + YETUS_ARGS+=("--skip-dirs=dev-support") + + # help keep the ASF boxes clean + YETUS_ARGS+=("--sentinel") + + # use emoji vote so it is easier to find the broken line + YETUS_ARGS+=("--github-use-emoji-vote") + + # test with Java 8 and 11 + YETUS_ARGS+=("--java-home=/usr/lib/jvm/java-8-openjdk-amd64") + YETUS_ARGS+=("--multijdkdirs=/usr/lib/jvm/java-11-openjdk-amd64") + YETUS_ARGS+=("--multijdktests=compile") + YETUS_ARGS+=("--debug") + + "${TESTPATCHBIN}" "${YETUS_ARGS[@]}" + ''' + } + } + } + + } + + post { + always { + script { + // Yetus output + archiveArtifacts "${env.PATCHDIR}/**" + // Publish the HTML report so that it can be looked at + // Has to be relative to WORKSPACE. + publishHTML (target: [ + allowMissing: true, + keepAll: true, + alwaysLinkToLastBuild: true, + // Has to be relative to WORKSPACE + reportDir: "${env.PATCHDIR}", + reportFiles: 'report.html', + reportName: 'Yetus Report' + ]) + // Publish JUnit results + try { + junit "${env.SOURCEDIR}/**/target/surefire-reports/*.xml" + } catch(e) { + echo 'junit processing: ' + e.toString() + } + } + } + + // Jenkins pipeline jobs fill slaves on PRs without this :( + cleanup() { + script { + sh ''' + # See YETUS-764 + if [ -f "${WORKSPACE}/${PATCHDIR}/pidfile.txt" ]; then + echo "test-patch process appears to still be running: killing" + kill `cat "${WORKSPACE}/${PATCHDIR}/pidfile.txt"` || true + sleep 10 + fi + if [ -f "${WORKSPACE}/${PATCHDIR}/cidfile.txt" ]; then + echo "test-patch container appears to still be running: killing" + docker kill `cat "${WORKSPACE}/${PATCHDIR}/cidfile.txt"` || true + fi + # See HADOOP-13951 + chmod -R u+rxw "${WORKSPACE}" + ''' + deleteDir() + } + } + } +} \ No newline at end of file diff --git a/KEYS b/KEYS deleted file mode 100644 index 6485099611..0000000000 --- a/KEYS +++ /dev/null @@ -1,417 +0,0 @@ -pub 4096R/F0B000F0 2013-06-16 - Key fingerprint = EB34 498A 9261 F343 F09F 60E0 A951 0905 F0B0 00F0 -uid Hitesh Shah -sub 4096R/A64C68A3 2013-06-16 - ------BEGIN PGP PUBLIC KEY BLOCK----- -Version: GnuPG v2.0.19 (Darwin) - -mQINBFG+GRYBEAC1zVfPQgrBkcxjkSNMkL6eWy3mMk+VwEAss17QQN/5HQSPQ247 -KiBPcAMEEhRpwowOeUiYIcTTGBEMo8pMYfv5Sh6+5ZmrgXGf8S8HyHdmvkzrxd6x -RuH716uu2Gg4RLpf2V5V17LS6SRUBCoFHjDBX/PCKBxvZzOevnwZm6Y79pfb6tOz -wmPC9URjz/T4eeAkUrGbcMtuScLzkEuUcOxLg+7qSB/FLgVAWK/LPd6mSvmGsk2o -WdNxnZqgK7EyYJRl44t9Ko18ux/Xriz37iEqI4DAovQnTibvK1oJnvNNFUUxPbTt -7I0IITH2suwsQSllMj9VVwPbtiJ1B/aB5DWtcHbpv79BjIkJYZIbJBoGoOrpC5XE -B/Ms6BnCkpx3GWLrZdzMwNV1KPufQWUzoNT77Sllr2cR3HiieC06g9h2mVeVNTAq -dao9MeFyo0PmJF6y4BvzGcTw9TxUysoS4NUzani3OfNE4qgc/HrvHoiEuoJ7++7P -60OP6OtKhPLKToA5VUF+I3qwIlymPhtTg0kaz4aJL7p1kjAQ1YIiISbWvAZyi9KQ -yyvisIsj9VR8r5gPCls1kB4Uh1HDeLzqCYnpxnhA2ktj6j0dnON7kP7cUcAsL4+d -MMi0BIm5oxh4pd8W39+0dOMTqgotpeQ7dLgXdsZWZyaQYAXiuYClyuytSwARAQAB -tB9IaXRlc2ggU2hhaCA8aGl0ZXNoQGFwYWNoZS5vcmc+iQI3BBMBCgAhBQJRvhkW -AhsDBQsJCAcDBRUKCQgLBRYCAwEAAh4BAheAAAoJEKlRCQXwsADw1psP+gO1Guua -B+uOhRvN9DgPLZB7rRy64eIlhnrRmu3Qun3CL/hSoI8CxcgjxcbvLsDwVqE1+cn+ -JQzSoygJDSSczgZW7zgly/bzSG1ebwBDolcc1NvPzRgby48UeOgf0T1IU3jOzvJn -6aWONgGg0ZKFYnq11ePZ/QGVaSZ4FLg8BjqzGAUrF6JKBbv7bwCMwMzBW++pK7b0 -WuSaHhCSlasiht4PPG2AYr88jm8xo5GKgVgSVkRrdcUKmcrrvToWh5e+eLXgVVpm -wJxbqt3fjZKiDt0q9nuFkCiEAUsvv1tjahhb9WduXqEPiRmOaPugVp4jjQxeyGCO -tqeRcVeJ0fCqn+Ode1tpj3wQEW9bpKCT2zsh3ZRbJofBFAnTOHsu1zDZUc7MDnUt -xr9P44F7qAGktigZ8djorIbTQqUHaU7r4GWum+gA29MUMTSbZ0rP0hj69vpQw7uW -1OAzW7xCbHRzQduyu+WV3V6x1UWH8A6yKVlMdvZq59e2cLy904BDQf1gHr0JuANF -s77RUAlOO9m45+X2w16ny6p2iLqPAxuSXhUYWB595hOGpc1Y7wDPJem1HLX4vmN8 -EBQkKQy8xc6g3RhDO2eRIEOIUAT33PL5+a2YEAhCxkcYAvcaZzUqcchaRLNs3iTo -MmO6ojoC9p2dpn/IXxGjBxx7AlH0/ysCx4y3uQINBFG+GRYBEADW2h+4cAbUbPO5 -NCkU6KRuKghdTz9leUl8+pvzZ5ul9jXmsL50Ay70dYhdCaRNUSl2b+YAp2UYZCeb -M9ZbJ2hfIe2jbNks+baJGfjXuKcqD9F+no5z0lXHSeG8Gllx12863ADsIMgIXzvY -kSU2S4MVTi4IloNwsRyjjqK+NzxtvvqOHKOpfiqTpmQNTc5LttqmiuyIgCmLh4Xr -GJZ+dhyLHJ5dJu0TcHsQyRE2kVFOEAExh/7lXg1Y6Hobg2qLbfmd1hqyUiQEDi5f -awZtLhilMmnbV1yGlZxXfcfQuXP1tbiGWji1/FawI11EvL9dChALphp90J2irx0f -pfpH6xDvKv1dOdttv3Y1+q4oixZYJ+OxNZ+wW634E4D3K5Yi69ty166+ArfFmmIG -5+1icsT1zh1d2PGsqoTTMxlggZubwmHATed1MdYHpk1egktVBcUgiAp2LNfy7qMN -lTx2XQ0DnptURBZijPkshfXzX7ur1www9pa8TuIRgjnO9zzgTq89vH4DdYSRnuAp -8SmyXnmGxT7qR/r0nigTTn5TL+q+wq+hVDBz6gTXLFGJcmMi6DR854ry6KPPP7jD -+o66Ne5iKyBRtpWEOrm9prgGyw7uFIh89QILWmoMDgkrGaiovjnlHpi2VbXp81tr -tMk343NHGWFkT4xifZ8gwkGb8ZzDQQARAQABiQIfBBgBCgAJBQJRvhkWAhsMAAoJ -EKlRCQXwsADwT/kP/1H2UllFpNNPCqfI0yQT1ndPVSKXXSUANLrhVJ0Xb+1+TXHY -8tLRoqWvccwArUeXJ67ehUo9dU7yvbAHtaUXOlumbtkhj+NKbqRbI6r1nY5bdZ4E -dUq1jxwK4NrJZyWI3ZLJuzgdiPefp+Keg+S7y5PnQadR8m2EudLX5nTW6OSaoUsc -Q9LVs8j1zp3dd4tUNZykWsMcl3LdveY5YisoKWpV9Kw8OClPYk9VHaUB2gevFMnK -RaZvWrur1IGQq6AH9fjc0EmjAsG9bHwxzjkGXvvDQZ5TN+zlfKIZMRg04X4gb1L7 -hsApwhdAweAdvg1qEK6ITk5cspmex6eO9YtcAx7axS7aWwl+sJps8LC7TQmhgF2V -PwF8JTLx/z8dMNTyOae4Y7jGlTvA6pyQmEcePCelLsEQ634z+tcTQ6jarV2nCIof -Kj9UzsKh0hymmyYbAK8Q2e08VNuqWo7TJuwRgdfgkJi0QDPYyJEW0bOM9p4kDhRl -wxwXkfU3Vam1zQL+w7AWbgFPOMlhT3x4AVcDvX50XJELQqspvzbeI4mwj6uZ2zkD -WTeOJQr/jkZG++FjnGCKoh2ANb65mOjrnpfXGWwxDqJPZSTtA8hjty5x/ytgGrcm -j9cDPUFpLoDrFHz3fHckQFhlGHad29mmLsqfeZvVFVCx2mAxL3UnU6/Scy+XmQIN -BE/yBa4BEADyM5dNdN59KKPpXEW8IR8NbSpjGocOwHo6AcRTCxFMm2PndxC2D6IR -d33SJvV9ZLXriSa23TmBOYwL7zu5iKBjBjRC4eAfu79lj7q9lZoli10ZFy2QakD3 -Be/KCSDGVaqTY/51VMgrUHyA6de6I/phBlfKv25GSXnhuv1V/avzKMe3Qhhpto4z -Du0EPigbbt6wGiAnOWmaYMcaDYG6ao9NnRcHhlcKGo1L9twNceI10CMXWUtUPHto -xoE+yA5z8r8Qs1z5PVnLC4s5PLshSsEgVvNxgNqABTtt65fALxVSZ11yTVuiSTjs -X19RMs85Zq5ioh74aZRQrS8muPiMcqtqTrVXlr5MGRfA9o1sOz45gN2u9NozZHUi -Yqh7d70uLT0PN58qzqJDIV9aLeZKXO8sMPNuMDi9nVnmP7nt3jTAoDQmokItGFLN -cH29rSfz5mCKr79M1TYxAKYiD3FuQF/nuWKBshC9XN9IKTxD4z5k0AgpwQcZtQh0 -iFXYvP5UOUXX8obYk1l+NbqgA73pe8xeHwawD5EVfKMvjOUYslc/MUr3bUVNIW7D -ra4FjZ/7WMNEXWhntjPAUpHCfcH8GU3C39O8mea2vY8J6ntpvpOWjoxYqlSK6GeR -ga4aZpj92YIiX5fX5qAsK+98ebVDGwTgGkKDzHCViAnBo6y6Ponz1QARAQABiQIf -BCABCgAJBQJRvheVAh0DAAoJENa8WH8fVp79VQ0P/RMecb1kMsYPU0MxISLHkpyC -xLxKenXwKdWoJIbZqe2cIa7RjYE5Em8MQB7AJ2+hEMbr+xv/BQoPCn7SaCQfmaTw -zF3JJxNXtRaiLGtmsVKDiSvjNemxyqgWVBwcGuBRbhQc6Ij5ceMB2p7Fpj5RRKvl -Nhw+M3OpmtZvChxKMFtoPqrTXAOgQolCfpaiROx+7mdV245FCCLdzoMQtpSw4wxL -v+XqfwYtnLnKZ7s0jmFbFhkel9P007J7s0LbJsgboeYYYv4nXLxM+YFeHF8O34sA -fqCItuOqJ0spRmIp1NBQ7zetXFJ/wOugSvSgZLrURzzWne2097XR85W0Z31ts24f -Ae76R0F+ho7vULqsmc2a6bqsSuJr4IXtwo7Na9JrrU2hfzStMHyFdPxJFehUU/cr -t5+Q3gecF/Nom5WYiPlLjVQbPFCTI2+3oC9nrsW+Ho1aSLJ0hBQwcePfsjKuLZQb -316oO/xgSTgpcEUlomXyLAZ+BzC3QRTa1rDsi/R0E2mu+kvWjfZ4K1oKcRmZs9me -jKwgopVEiw+g/uAU0yQtsP/B10Cd3dTzmv++tlWaQEOS4bEbhYOdiArn30nYJTh1 -9rpkHB+My/wxfyRMKXC9kjxyqaLSEAOZQAMhJlHNfkjkhAXfgUk+my5VeiaV1CvD -+oAfrA0Z8sq7sfbKl0XZtB9IaXRlc2ggU2hhaCA8aGl0ZXNoQGFwYWNoZS5vcmc+ -iQEcBBABAgAGBQJP82NKAAoJECVa31bDbF8Pg/gH/1s0H/6iutv1wPUzGAFGIODE -TuRQGCdMZhnUz/2owFqHCIvLkYHWHew3MhG5N3qbcPCtegx3JkCffuoM6giBLvgL -EnQjwT5vRVvcrqUUB7e9AmUB8lnP73/r3fWQW+1HIcGwozKc53GbECoVzqCdCBDC -P38imHeomPyQC+OpFtCKVHr89LH0tQ+k9UMyTXR+WzAQqixreE4S1jbEkpP4rdxL -vFcjtDe7+n2ISPqO2dzirARNbdKy2SprIzM/1pyQBjIN0Pz7p1nHVV6NRIhrxZO0 -6a7Fr8ZaZq670nSKpTfV8dxohXKB6TC6SGGRbScElruj3fk6MfPADZwQ7GwAZCuJ -AjgEEwECACIFAk/yBa4CGwMGCwkIBwMCBhUIAgkKCwQWAgMBAh4BAheAAAoJENa8 -WH8fVp79rqgP+gMv93BGX8hnrd8Oe2CQMcacjIarVXEb5FqEcGFeLhRNeutP9+Wg -wv6elTnwpEhSEtW+kdis+SqE5qP0zx4M46qe0zyfc8TTqm+E2CojTm/geTIYqqWM -xbkMknm/1K/2Qw/zxBvsIDh743xLBMwX9EpYK2G5z1vw9i6Y9CTkjbw9bXz5KKj9 -7dWce76GUY8q6iDnUxYLNdjGNK5zegRl+OvfvGsv7COL7XIOHlK+DZwXV8RhEsu3 -//pBXyBFs9t0dP+5qgF9O0Y09I1Yf70oVWRMMoNSlc2vzXEii8wtI89HDxVqgGPM -WEy/KPwPY+dra5O5eu7utiEK6tF9c/PrP0/sjSzB3C+RFvfAQtqu2fnsevZ3nKMG -j9YEACV2/KrBHW3CmcPwuZ9ETUMKnsl5Q0zofWiQKMyVry96F5ZNBUKBunL37Wha -3gzZSiihiD3h3luifcI5/RF8Xxuccw9kv0x1Bv0VQOkwv+7rdaELhxyZvVPqyE5C -h9e4fvRAQWnSEHgEGtmEFH8K1tPTNwfrCfYS971exDlxKTBdPaQ/4vUIH8Er+FTW -cbSisgSX8BIk6NMadPH4YbG4r8ZsNpKs6dyiFBB1iwjGt9X4i7QD7yLr7p839I9u -9c5oOl9SRIIaSg9nB5hWMZPToTK6Eyst+6AJpEf8ExrHnWkRDO/VNNdeuQINBE/y -Ba4BEAC06JauJ+dskpopBnUq/T9ENxlqZ0sleg+2czEghYBiHoINFgw14Q64rbbO -da1nMctMcTrjPhG4wCXo3fFZCMzDBTNTASK89zs+MUZW4NonuS1F8CjSy9Lw7OzT -+kHuPDZNFihW/Y/UMbBj6xK5FI/xKx93+uYHlVoSZ5ZJ1/vfDRaqKVGFKgxSA1rf -ioSOy3rj80cVz6zeooblyalI39eex4iej1mj4qUxosB/LM47tgVmTK7GKutJvP7b -I88e35Rc2Hfm4/XvPJ6KwW8j5Yl2f0Tmxdddw/FJ0yYwcQKEikaGvAqnyZqpq21f -M5avvxChbmVyzZ5jQTT1mX1c5ffX67yghWQHnvJWNWyWM0qi6GcAmMzVZyk+rAiJ -0ePc1NtJ4fVRdiw3FgNOIQ8a0CicZt3xT+W9bbHplZnWypoFEb3g9v8OwwoClvJl -MzhATaASNuMo1yBu28zJWATcXmtw/Zg4CH6uBH4MNZKlNc1T3QNy2679fWldEsf8 -EJbU6SMCxZZ9ACrFHevn5CNEsccBMFdi+AXfsGf0HOtGvObs8YifkM7sIO06DxrD -d+qpCJEYKCjSHoCBvJxwTq0wPPfKHZkBX3GwXfqqH2w7NyjteOubgnrO4H4HNCTQ -C6R7RmEb/94c8albt8lAVmk6EH+fwqECjo6uEtWKHDKEdULM0wARAQABiQIfBBgB -AgAJBQJP8gWuAhsMAAoJENa8WH8fVp79s0EQAI1ukP1YPiW7lb+Vg0qeKZ9mz4PZ -f9wSA9/Lq0jQycF8mz6ZCDX168Fq39RzxDhjG3smsG5RVJ0PSqxP1uXM+YZu3BYP -e6k8KySMq5AInctVWTqLhXRsiBYwVIeJnUSqga6e8ETMRO61892wJ2M6U0SdFeVw -3cshWuHhhxb9R6eVWctecVdb2WJ8PZ+AJ9WeT3mooN1msLVhyZxFjvObqnC9uf1B -shyzpdx4PBBHm5n0xcpPorqXolPF9fUKqGTGM3B8rMTtC/KDe5pJn0IYPt9oeR+6 -vJxZ4sxaGICtQX5WjXnEH4tpi6whtzyXo8H2QEFt2qN56aJ/SPs+VpjdhAtPOpvt -auXJe07Ov6B6H3ePqAZDo4blIiJMzdijOESLk1BZC/PAzfNZsFkClmz7Tlz4+wQH -sbKwgGbg5ZyIxrzwSBUj9GsQJl0FAgKZe0L6v+wrfYARA5BeMrblQsii5xVyI7EG -GY+kE3LgzuitgZ4D7tT+ufHjONMJxj/zIIK/pejgccRRTLNZMe5WtmpC18hOI4RM -aEZu+6MxLFbJImIeWKEUSDBATyvFabEnyZGp1iHuZ4WKvogS0cJLoHymdFilvFNz -0oRyQAwvNHsuAWtrHEUwkyUR17IlDA3XVYMnqkK8Xm23zjP0PE/oi53OOEuJbxpJ -stAukccPZ/ir4OIk -=P10A ------END PGP PUBLIC KEY BLOCK----- -pub 4096R/3DD51430 2014-03-30 - Key fingerprint = A13B 3869 4545 36F1 852C 17D0 477E 02D3 3DD5 1430 -uid Siddharth Seth (CODE SIGNING KEY) -sig 3 3DD51430 2014-03-30 Siddharth Seth (CODE SIGNING KEY) -sub 4096R/63225E7D 2014-03-30 -sig 3DD51430 2014-03-30 Siddharth Seth (CODE SIGNING KEY) - ------BEGIN PGP PUBLIC KEY BLOCK----- -Version: GnuPG/MacGPG2 v2.0.22 (Darwin) -Comment: GPGTools - http://gpgtools.org - -mQINBFM3qrQBEAC3yTeDKsirKcHJYbJdmAADj5UMiCdAZrTeOCwMNvOWAsjJUkax -rrk8mNheUcGdjsFIv/mf6ZyWf7eGtiPUEAEUP1atzoOLpXeteaSz6N8x9QjLmr3e -eRd6nDmjB2nQy5N52zCqlh9tZ2E2MqsM2p7D1UHV8M9+pm9fjJ1R+JUDo02hxIAz -isnOfh8SYGGK3QzC3shNb/XHA3zIdGNN+VvsD1/3ab1p2CjfDk0nZQp2IhT8kSAI -jIPYcfhiaGgXOcVbBvo8/J0WYnjvOlHJMIT729Wp1mc5yj1d/PEy9831bUtXbGKV -f+Sy+y8dHVZ3t4ItMIcyd1GtgvT5LZpfdRO4MySk8EQGhlWB8gXPzt8wSy3GIHGe -av7wIcKfI90dXC0sZq6w74K0Gpgy3trlfKv7k7+Vp5JY0Brf4j2I239mZP5t8GSr -Wq4Kx2zdNiUf8kGBRJbno/qCsSL+oPtq3gTLuF44hRemAlQvDVk4HHp3QN5BWXOl -EdAt6U3NBCCaHTteELW8U11BIC8O2O3xkfZ18F+2pD28KO1aPGvUSDegGNOP1J2b -GXwZe0gFN6LfQ42pb5KNJ3NUYmqmqSWZbqA44YSgaZZPOjWN5supjvRAqc3SsEvr -1oBZ4KP85kk2ibC6b5j/USFjLoXFKRtPaDR0HPAVdWdm3RB85Acv4p6C7wARAQAB -tDRTaWRkaGFydGggU2V0aCAoQ09ERSBTSUdOSU5HIEtFWSkgPHNzZXRoQGFwYWNo -ZS5vcmc+iQI3BBMBCgAhBQJTN6q0AhsDBQsJCAcDBRUKCQgLBRYCAwEAAh4BAheA -AAoJEEd+AtM91RQwtl8P/0zRJXKBIzgI3k4cwZWXLZ4hf0KAnCqJkxlTLaah9nWj -9lXEaWJcBZcgEBk0yPXr7zubmA/glahfXXs9QDrst9FAQrJg2TN0qHuyiPpMFSQe -cUpolhalIq67rCHc/FjOLqtGXBGJVm+SD7AA0ODBc982Ckg0dIBbvj/inyC8FYbB -nqdlbr3NkNABSI0RmYKzJv3gY3JItBfJK4E1W2gTnUkLMRpUfGg+1q6OQs73OAYn -oiCZfeFO7rZDmr+2isezDOzJHrrk88fDaIKaxyohDRpMBqUbADkkk6BLkW7AtBNU -bwcKcUYSHcsvTHDMAo9fj8hPYHcIrKPAUl/ZHGfoB3YfPtEVxoW8qYGygNvS2dWV -EVw4+d/FVaSD9k6AvYg6xqGupgorO6cwpT/Av5ETGuQYGJyYjW4cno4pE057wv2N -TzdHK/mHf09PNJHmjcJi4BOdo4Qk9xmMka597LmxD8C48rM49syxRtUj1Nfx4uKP -7qdkeREvhXx/w8i83FcgO4568fvFSxLNikKz1GWhb2zmFu7vxYEJrMa87ELNRXMD -1EkI4cVLcWKJ/7eGL0zXdKaXazNQtI4QgoHLE8vCNY+KllgALxFlZ2VVJWyByJEW -b6IMElAne+q+vVMwmrejYxR4omzPPfpaqneSMsX0Zi0jVlf3c1yz73qQ+EzJZP2i -uQINBFM3qrQBEADAGn8vi7oie9ydI8nZIAxBwojXagg8FngV5dpAYs29dNHb6s7H -NndgQee6URJ0WgfzG0mD6hnNnkxj0/ZZ1QU+Act7IRlELrN4cJ8rfZkex+hxZvsE -FENtCNyWljU+T+EXYYjpgumZQ7HfJsr5B9ZzEbv8gypUnq+r3dRrfY/dEcoXQWYA -3vPAff2hCu+CDDW+hULbRlzYC3MgNf6VIrhr5MN6ou2A9x93UaeEMmDkHeJZDpSD -r6n1/uitU/TT030OIE9JWP7E+S4lVVsov5fXN4gO4lMlo3RuJWpFTb2iEbigGCJ6 -RNMTpi6o+A4pQEKl01wJSwDJVifnT1dgff+4d/qC75NxnTViRG9yZKEZ+JEtyKLf -CDXSEKtiITmCirLFLr8Ml6C7Nlp3avGu8IJKxabCZ39Sh81a+lRO+CCffGNh8MXP -HU1tIrSgn6Ylc1EqtlBj6L6b2yzQTK6gZZY/6ziEeOSFzDqbAhWCCWUmAywGtNYI -/nKmKl7FFsU4NwVa1pBq7gUePMZdKaWEc+yLnKbaux3RzI/k5fvbD5AxZueacTwT -a8XVaHBTAXridzChF265taJceRG4pFsMlUFUJ2jfBpjvYvUBOBQdqvB8LG76my96 -tzbvjObfaennFCd9ztdO4RoTUryWCXLeUEmtOGBJK49+cXQ1A8qWM18AIQARAQAB -iQIfBBgBCgAJBQJTN6q0AhsMAAoJEEd+AtM91RQwbT0P+gKFS3OOT4LTO1PfxOPp -HKCdE1242eAtgzIxU/dbliaUZ1bEH+GHTtWv3YYhPRmzLlNdsy4UDpATdvk3YPCH -AMSeX1PA2VD9HxTOpC02oju2nXSXdJx/F6UeZkGlwLWOCO8HF2zvbkTrTuTzTUn7 -F4Zk3CuhmkP4BlMTOzUQJIJ8DG1v8oLk59I7B0bgPfYJWa8+yrAO71J5H1AKMte5 -wpGLwNR12N3MAnfgUCPe1RPyVeels7e2L8Fu37skikxOLKc/ToTHNFCitdBdUqAn -Wjuf8PDPsWOHuYjQuAZnXbrnUUtWVSAkRUswJjQccbzy9fozrFo5LgpTSQdBYwyQ -ETzeAz7lY/JCzpOy6Xgbr4ZJpaC3qWINiJhqv15xpGHosHHDK40alwoqRg6Df1vr -gsjkfydU01cX142ynLq1fH5P7C+T1miVCl19q1GOo3Vmb3BKR6Xowl6cXuGGngMu -E1+15fbb9s5UvbhabCDrpVRlIwe3KT/nESb6TojIyo4+Yq+WZoTjK9+ud9R05LVO -KydE3pdOuclIZrt4peqEqgMeujJs0HszuywP2wyH6pq4cf5cxgUEhK7rasmmkPVM -YI7NQVFGy1JT4LYJBIabVMGa0wThz4PurbBoiJ3CpXlkYvyNIv68xIYsc7SndU60 -fwn9O0GpWzwQAMot6G4bg9A/ -=a1vb ------END PGP PUBLIC KEY BLOCK----- - -BEGIN Key for bikas@apache.org -pub 4096R/1EFF7770 created: 2014-08-19 expires: never usage: SC -trust: ultimate validity: ultimate -sub 4096R/D8797E15 created: 2014-08-19 expires: never usage: E -[ultimate] (1). Bikas Saha (Code Signing Key) -N PGP PUBLIC KEY BLOCK----- -Version: GnuPG v2.0.14 (GNU/Linux) - -mQINBFPyy8EBEACoetSafMjg2djF1PkUEECdf9PFiQxLXjmVRmQyJDKFNQBLjqM7 -6gg5AIsJouUSHe1uhFQXElR6zE0FkZFnIYsZOUfrIPKoUz3ElxcYgGDR+tKUXcTF -7IezEky2Mifl7tJG84EvRzVflXBAsxaCStk06FXToUvWaNYoIX+PkEBnSPPQnGQF -nd9lRJlmcp0QGMoGF4ihaLwGl25PTaUEnilez5RrQCHvmcLoUY1ib9wnA7KG5ia4 -SjUaMCST7ipPFGSc1lQ52gASKNKaHk47kqmsWL8fUJDAo+Bj/q3ykaD/yAdTTm4W -UheVWMfQJhqKU+XIv6773KhBh8dqViZklgBxX+K8yLfpp1CIOrAlQdnqV2LDdrKj -McDEi0ny8fPUbtiY10YN9IKYgrG3s/m8mGVNYOpgTriNu7S/CGv2S5docKt5MCM5 -GU5mhrw1qzzLj3/oEhyXP+uN90KUqAd6LApBYWm0w3K7XVNsMvGwyx2xKab5P+ER -mKxX+tJZP2E6bEA12ux1f7nk0jjoj62BRweQDYPFEfyn7kUwLEd/E1xVDA42dx4U -snfp6aHqKkPqXmzACTCxymwZ9OeX2AlDzgF3m+pWTIXJXCxkq75ekRL+QtwcYrLz -IyLTQmNUwhK2vXqOXf99txPr6zeCANcbrsqnG9sV1JpdZNtTaZOkEyvmOQARAQAB -tDBCaWthcyBTYWhhIChDb2RlIFNpZ25pbmcgS2V5KSA8YmlrYXNAYXBhY2hlLm9y -Zz6JAjcEEwEKACEFAlPyy8ECGwMFCwkIBwMFFQoJCAsFFgIDAQACHgECF4AACgkQ -iyW5tx7/d3D01Q/+IGWT8oAFdkUSaG1NOVOlgCIeMQ4l/q+A9s7F8gM4o21j6Yzw -l2WoyOm/BPIpTlA3KxlDnx6vDJN0YnlMGTyeg1OFvKxEsYUhwQaeIHsuAX2/e488 -LIYjWJNJt3SmNeO6xefh+4892VjNMD+PeMUmXnIBZd1rhHC4NmwgoaBqnImTAnwE -zj/IAmaaco3YiHKTQ1Q2f4R79kNUSkRVPIr6aFtON98z5AIlxbimF0wW+J6AFzp2 -nMxqS8eJ8AxNf0OUvrglYngNzo5a4+qw/sPrX/BMN4SzDk0aL5nuB/mMn392xNvt -cSBjWH0wtLqSXaG+U+IswDubJ1VZIZOz0OPrj3G+vrDJ8BDyJEsgcvThBuJl0ucQ -aGNU2gIPGxQAXsbftpr5+E2n20+5KwrpyMerzkspFvmnhB/vSD14CwY/RCnxfIgo -IGrnZiCnRSR9zz5qAbGmEB0XVrILcRSzPbOEGjZLYjLl/9dYeELtjpGs+pTIDOxx -IVF54JZhPHQrUHqrkzryr9jyPaN8s92JB0MY5fHEbY0+tGnGI98qNvvJt0SmZkMt -/on3Zonp79pdRc40GxZBddCrzGbcYBrBnuSdpbuAVf9N4A/8EFZuaMxT2CW8Bbc7 -Y3VjC0H5VWPHJHIAj9dnmf2dcI+V9KtBKxzkEEHpuFD2B/BxLVp89muEBY+5Ag0E -U/LLwQEQAJrHfJ5pq13KTm/DtLYrtnSWRAC21QBqnnf52W+MuPEvhr/i+5kM9hAI -GjwbmyAY9z3PYH0wYdTAxcBuCQqfac1CADiowF669j47os5WSSJnkRhB3HRQRnDy -Wt8b6HDtDnjFqwjm6Q1KYlE4Z3qJTj33g7WOEu3nvk33cFoRRYZhho3nmP6SdsCO -jbFzZsToc8sRjGVrybSOyaUYJXbExBoZ6Hkp4SF2j13ORGVeK8x+ubluSm8txK1L -nv9TwMLBX/2MsuclH02GchXHxEZGPIEYrkaq0Oa4hhJhgEzRsp80l56caju0WCmc -fgdQQeBbV59QdveTcWqVUU1kxdW5F4b0ourtU9+uFxHT1SE/2RmYieMWbQ+4jnVq -KNpxnb5RnrD4AEfdKno/GOevS3jXbqvd6kcMuxFOQxgaYNZBQBQkpkvwMIE5GEso -RVHcxtB33QQ4xARR1Ior/fV1r4uasC7Ufg9qep9SCNHyHR+6JT9ylEgCCMnguSLj -HMy3ZXBOXuDfOKnp1o/cFStY4KHDwVlLS7gxrB9uEAeQ27eQn6Fm51ODEhfnsLUm -ic1P0NIPJ1xqA1xLexdp/tEgPHi24KQ0KrG6oihnGAWFgwXBYtzHg7CpYZK8pg71 -ebT9fuTHBkFE5B8Ov/PHhHI+PU1SHipsg+vEuuZzhtJFfHQWNGjHABEBAAGJAh8E -GAEKAAkFAlPyy8ECGwwACgkQiyW5tx7/d3DCPA//dOk9uLklimDbmawra+92NH2I -ERTmMDjIB7C1HVBSVSLI6oLrZHrSj3IbFzFDXGA3AaLdqhFBD/VMbfmL23F2DJX3 -FjPth5YP131ZVbrU4IIQoThSGx3KF1Xd0QQWgL1tuOkgMBLLyXjymwrgv4N5fTNc -aJexCXTFkCv5kj2B0UF+M48PrpycJA0fCLMGhUD3fZpYDHGfZxobkD+1R0qgGF5P -0nQUROjtCQpW+/TT3kTu1EnmLLw7TN9YMIaUUBTmWgglylByIvLhQCzwWuKFte9W -BxYqRRNuV1B0LO6xTiqVvRh9KzW2DZJUTPUmR9utYtMa+QjzD+3mDk+bS8yAhW+W -JVWzYqzLeZ1vYeDzf8gIO1qVusi/xfxe/aK1SifwhqjVrhw1WDDlL1DkIsPmN2fz -HL3cPVGCCBlBEhNDaAG1+tmtj2aN1agOhfARQfuW6oT579yi/NPvzw8xqc0aGYdc -//XwgBLyFT2vidzoSY34+iU2PLqOivZYELsZ/Mz+9yCnbyu4bXXvkW4V7TshpPDN -6iiLfbmzqv0PygHeYfUISFpIzzEe05cVAVbS17Zm6MCQxG2yekgLBMiyaQQ8O8Jm -BHGU9crYO9ul985hJ6p9UCsXV2dYaZgYMEeE3Is0Xzwaeu07z4uFrK5tMNu3qPGL -UtjBC0yAoSJnJL8bVQ8= -=ATMO ------END PGP PUBLIC KEY BLOCK----- -END Key for bikas@apache.org - -pub 4096R/EF9F98AE 2015-01-05 - Key fingerprint = C2D6 378B BAE4 DE65 BA40 2D3F 7CF6 38AC EF9F 98AE -uid [ultimate] Jonathan Eagles (CODE SIGNING KEY) -sig 3 EF9F98AE 2015-01-05 Jonathan Eagles (CODE SIGNING KEY) -sub 4096R/44223F4F 2015-01-05 -sig EF9F98AE 2015-01-05 Jonathan Eagles (CODE SIGNING KEY) - ------BEGIN PGP PUBLIC KEY BLOCK----- -Version: GnuPG v2 - -mQINBFSrB+ABEACvC7Kz9RUbL44+ahhxZE603MCRhqbFUHx/anWWqusONTJLbVlq -mu6xdh1cVR5voDFfQ6RL/BtrPzWbxTAr0FybVK3SnZY26TtobzHyPZkv1FUSIdWo -yLP2dcZbv/Z8WJTKH1r9VLld9n3Wl1maYciaXGRwjy9q60ZDBbgcGeKiCoPgKIEv -MOtnrySOj8HdrFjzsYM6CHshBayaND4KQSoq6zHX+6cHoC7bVUcI92y2YJyCvGCw -xrzwuq5z4+u8Oam0vXeAYO597aBte8ZSCZvAERc7l9562OPz5Wi6fXYX7ZYZVtnL -JEy6vsbRPqHPktY53QfDJqUNVDMWHRnkBt08uGUpPxt33hmCQGb87YEUbeh9A2kf -i69HrZ349mPyVt0jpYaAF4Ffbhk2XfisClVP9iOfOD50Uy0WYny25ouBGXT/MWpz -GgGRpl2bmRmjlqcrbF6mzHwtULgQrMWgt+nM5bbFUYvZDKGefzdbmL6hd2eIGEZ5 -Oki/twV6yaQUL/StiMjpDKlz5v7pn6yxQ5LvEe81QOvhKLzgx4hPVy/f8CYnb5zx -z//Jr/h2+0dIQV9OFyfiahge7rWRKbHdadL1KelzvwDhICgrL7oD3T4HUGlD7TXd -MUPyzQwBEEZU/BWISUlKY2GiWDEoxSzu8LS4JWPvPUB7/CMdUQunJmLgvQARAQAB -tDdKb25hdGhhbiBFYWdsZXMgKENPREUgU0lHTklORyBLRVkpIDxqZWFnbGVzQGFw -YWNoZS5vcmc+iQI3BBMBCgAhBQJUqwfgAhsDBQsJCAcDBRUKCQgLBRYCAwEAAh4B -AheAAAoJEHz2OKzvn5iuqloP/RZrU4uC8+rZ0VL5U/J5hSjn9O7xcuXN95CuDTmD -NvGCubOqZMcntzdPtjynD/moVVy0iC72a6UlrcZ1eGHZnp+vst0ZocVbMnN3fyWM -vuvQbBTgijrKlgaLuF1bH2ZYF5JBqCeKE7C3cxzsFhaKv2qUZKQ08jW19F63fWDn -OvXX5zvcsHWMpG3++WJGYxd9CvljKsYKQndbqAXbRnU3Em7CaM6R+2Tk9/QD3ZJD -CfBi3F4miaCJR2pZ+K8mMk00x87U1SkssPuPmfoX2cGCy5WSe4x4IngMhTL5Yhw+ -iKxmUIkFLSbYE3m8vr1/zMgiRoFOmp8lURTbl+JiNRE6q7/yhERBiBuk1WF0GJgR -yKIhvypQfURZHE44WxXI7C1EZ9ogcm24rMz1Lah5oBnSVrBZ0dt64YIPtf0EamkA -IX0iEkjq+LKjR3uGlsc9fFgEN3qv4+k/V1Qg0hXOANsKOtCH+oO6zyH246UHQM0K -oPo9miXMQyubb2yyJYMTSjxP5tGhtPeQbDEyJJwCI3WyiQVhfKPuooreHy1DIqQC -TNDqmfeTyeZs+HsYIR/nR7L7qK4MCAmljoVA2tec2ryCL7bALNFifoi7eVFKWDzq -iUOwhlxJ8xQF+8bmbnuFQRC0i3XlIaLT/UI/Svrs9eC1Sv6Y8c9Tn7ZZoFoH9hrC -Z9MvuQINBFSrB+ABEAC2dqyGTcSeV7RUXahOCmV0CmUG3B/Cu2i3Phx3xAVYnD29 -J8dphzCmTYaB0cB225ktQm1uq2z7S6Kl5Vb7qF1RhxVgCEInBPkbdXA6EEjA+Nwc -/q7Mj2AipoTg28Pvk4J+e6GKad9Q9j1HK0v6Eh5VC3qzUY+BfjVW/yTRUSKzIdQy -ueEbKkUl0VabGFR/bhpFu37Las7yTBN7jhVOp1bpUTS9g9XgsaTP5tjzglNpIIvu -DZwciFKeSkdiNreqUbqfMoCTUCIets2E7op8nC6EuDkoit87xXX4DjRpepqwj1EO -ZXnd6jRXPE+Pf+RrNzcPs9VTrQJtfIiDdNzmkuQEjyZWYuMhxhac2iNBCH2C/bpa -mcJjEDz5MXM19vHdUQhFwI2lxoXDBjj7jbwx/JHsqiM0fg17xuH6y64l9ps2+7JR -PDET2eBwcTBqOkVADoEJ2A++X3CHWYQt0SrvgxUrz07xMZvEMrHajHTDezeeUEKG -HBqKq8LZqNCvF821spER0vxYI8S9ihBzMUBYTRWpgSxjnKAr3ASH74ZJ1D+akhbE -SA+pbPRAqW72JlXY4E3kIjammoYnMU5rP3iZOy3AFZSU81QOEwkJ/RQZX9VIgwt0 -qR6uxhAUz9HqXCe2mNQb/igjYgQbb3G23fQyXS4FLig/zUakYVF8Nhi6/BuB1QAR -AQABiQIfBBgBCgAJBQJUqwfgAhsMAAoJEHz2OKzvn5iuZXAP/0Cz6CSDgBAPqZmO -WJ9vQ9vJxuovaeqKT4Mm1EuxiagOt5AvO5WpB3X+L4ptXHKTK6WKD1Kgbx3xgCgj -AamNIkan2WI7me4+ev0XZ6xVyBrfEJgFyBoYEiKEFpXJmv4hziXzGu1MK4ufTSUd -PYMdS2yAGQ37AQ+9INKdj49ibt7EN1SPQB/K6jE5b9UWo0oTiEmVd+uHqM/nw6rO -dMnHUPhlb9ZBeRohJSzA1vivn/yj7X5mk23Q7ivcMJmLtbH49aJHuG6tKRbK5klB -WazucdAOAyzySEk6nOU0IZdkvkv8LOA+BsXwrZ9EUIzwtQUxtYaIsr7yYqDiuwsR -rmgg8kWYjxv7Jg235sS+jotu3fuEiO28cfEbQn7SCYPlfYyiYNjZiyxfDv2t4l+0 -d0BOO6ojluSz0caJl/rigDhrjpjWaLRwoNQaRCrJ/IZWsJ6/EjUs7oP7YVV25jxe -ZNHtzMZ9i1UiPPPf5ABBFmApn7SAdKsOEnHRr6VqsFnbS2NumBh9HNGVyUi0OKHQ -iyIJetptdkrgVNNoa0bXnT6hTbQyn593C27zWxzQScMN9t5cjqCS8ZhmVkcqRG11 -oO0GPqlIAAp+GVnAYF0ttXtPZdZULObczwwd26QOwZW9Spdbgvr/L1j73z6Z3h9V -7TC3x8qW/hu6bKJINt3Ekcw8nr3e -=xGxF ------END PGP PUBLIC KEY BLOCK----- -pub rsa4096/D9B17D1F 2015-05-11 - Key fingerprint = F64E 9745 F021 08C2 9DC5 17A9 090F BE14 D9B1 7D1F -uid [ultimate] Jeff Zhang (CODE SIGNING KEY) -sig 3 D9B17D1F 2015-05-11 Jeff Zhang (CODE SIGNING KEY) -sub rsa4096/66587CE6 2015-05-11 -sig D9B17D1F 2015-05-11 Jeff Zhang (CODE SIGNING KEY) - ------BEGIN PGP PUBLIC KEY BLOCK----- -Version: GnuPG v2 - -mQINBFVQUQkBEAC+yzgmDQ7ZkiQXHkanz1e5uoNvpJtynZrYxCa7F2igJssD0/27 -6mKpxXtmK0B3bpCDxynnRs7ylwsneUA6j+C4rUn+95aegeNkWxidFVIr0Om1Swo3 -YDimtXDG6qF5fVkNFfhGFHk7P8TDzE1Mi1OWYRJ6Iyt02GgdwQZfmx1jbNZ/DD7K -yZD4iPww/2wqxz4HANAkT/o4T521HuIgWZYfi8rWicK4GvA1lJaRgDgSWxfnWnoD -ZXX1rzsQiATOs8FSrahfr668x0Z2sUHPMSfTTGjqLz3V81LOFEPLIKOv50uIZYXU -xUeWW1XYWrhJydx3tdVk2LDEQldhZYxXJHmNuwYXuW3OymdAuqNNKY5sb3852hQz -34DrxiwHn8NKjFjw83RgGgenIidTQ2jrc8KB9RXTx4nvmtkclJJAbRTiV1GOetE2 -SJp+2d83SnddcaBGoIgm6zFWAZ3gg7GNAJtvAUhjXFFvpXTvumtuXRNwM7jmQGLX -veCtTozwZZnxULrNr/mSEp9IMpQgMkSPlPG1YzWqEcYvXKo1Mxp3qaxHCxHiImDv -PTFUi8s1v3xFFwIs+CfMLNp7RGmhyy9oYQCZoCBx2SPAwTt2qS2e1XEczDjSA+lm -8csL+2NSEUDuxRI+ugtC9HPfREql8yHEVjMtW8M1k13MfmHHfKGE4+ywoQARAQAB -tDFKZWZmIFpoYW5nIChDT0RFIFNJR05JTkcgS0VZKSA8empmZmR1QGFwYWNoZS5v -cmc+iQI3BBMBCAAhBQJVUFEJAhsDBQsJCAcCBhUICQoLAgQWAgMBAh4BAheAAAoJ -EAkPvhTZsX0fErYP/3oY4y4uODxcZ393C/JCTC5qfuz5mlvctwUt7wLU+kBh6k88 -2uChrDfp68xpNFCBRQF4/W62T2eJceSvWa+/O7Jw+HFYrdkJka9gAflaSIxhvRLB -olOvt2CahmoTgIE4el6hLpK3VENnlqNUcGqZ6d5wmFX/VMAYsUH9N53dSeQTlf+m -4eFP3HDpZLyYKwpgA6Vmoxzoc68jbzPjOqvmTvFdPoilDKQp5a7Q6ClNgeB/QEoB -2RqNmZjDblK7B/JPiAwGV6+68P7LMR7FrBeUe3QuSnfBTFthRAtoczijpm7HH7vQ -ofP4N5qcVw9lxID9BSQT1l7rn05ppZHeuPOhl1OmTcv6RrnfK93QX6Z0niVJEeG3 -TxbBsQsYt32GEpd3Ts2zT/yTMFXDmV6baiPA0/oqeFwgzGUeNnVbP0hBMrzi864d -FALZSTCabadu8JKC2qMG4egpRMI3aJ3hHIW+t0fgYFIiY4kSRpmbsfQmVy/7DfK7 -xgRNRo/nem+4Q23L+Xxwb+mjqCozfQCSmqhwXUAtv0CpJkBJ3gUlDLnhqSnxKAXf -aZ4MYL25wiWmcu1P2iI156c7fKYxhIrcrv30UvzQXrtwTRC0Ymfb/I8TI6EjGwiP -IwtmCwqB7MQMUE5Fo0IioRiVAZAayFWP9HcGCYspHqZjPLdqCdNp3hz+d5CjuQIN -BFVQUQkBEADPWUl33mOCNvTStV8z+ux+gCTHhAfm7peYuEDgS4JNUzLstBf8BUro -PdhnH2t/KCW4PHrVTlO/NO7dHfg8DhDKByJrEwpsS/crCC53xTBvMKc9CNqw/qlJ -FRn+0cONOUALvGX/ObkSTlOT6IITXYPbrVtpV3olAto6MYQ+nvA56rqkaOUxGpS1 -x25SIw8l5OfCndkZISk6huQ97WGUD+diVdX7tmnOYMkG7vLkU5IvxUnBDlxJkOtC -uyNOMdDvTE65YPyUxqsjCkbq5jbAu5Nndl+h/PMQ0Vm7NSkfAvZWy9LY8y5rcI0d -T++6s5goo0bz6xD5TGjQmfFXw8mLnnMDp0kEeyNLY52uwqM0MNpQTmzHiRRoNYFp -AnnxY6oywJNTdFtI0XRLLgD6pK7DQOKwFaikQI5cgAIYMZf0pcMVyIgdmyx9DrXy -96pQni/g0av9fpWzAzvB8YBKyyS4FXj/CzK1BsQJuEw2TlvWV+SVpYMcvtHD080g -9UaBgPbakNM6CSzI6a2uQm7ZtEushC43al2ZruaHZOT3NHAofI8Ro7AC0tNkkf38 -vKST3DgacQfNGmroHTF2V0FLmpD+pSo1oPksqIS38/C90JbyNDGxTStaRb+yf5nz -LOAwRCuPUNDOK5o3PTzyr9ROpLgpMmfCl3gmrIzMtQ9Ixo54I6zImwARAQABiQIf -BBgBCAAJBQJVUFEJAhsMAAoJEAkPvhTZsX0fw6QP/RTZktz8jJ8CwnQY/+LjDzZo -Xs8+vtADRhhk0JB0q1sutGEDxdkGLjCXoD1K2wZrSrxBUKqwL7Rpc74DvwAMhcpA -uGlQanFIQzskCEmJXmJ0d/Twft8xINRL/mHok7yed8TAiJjYhbCkwKi8nXwvbtJ2 -B8wIcwxk3oN1OSa6149WYzcrhEfFmHcIEg/cHCsD1R+b9zJrKqudE8O6O/AIJ83R -s+XMMKECSHsfCsI1qZW0HXFzLMNw+Bk7m/2gRMwtpe4Qqxw3aEZ3D4jeQtgPWb5+ -RNlKi1DGUL+9tCFx+RCdYaF1dtchnsn6F2KsU3WFSHwKBkeRdglnBIzKjIEMduKf -P7SlyGJ29jlxXvl3AiYeSYVEihCkwyUVSJdmEUXfA17UJGFMKVSfm6TXTTTJza1h -udo0J870SulYejre6zPTHp8daVanwhoQMThMiXWu6rGU7IHAd+StTdaImbbQepI3 -PCQA5i2r+Hqzx0Eq958PLwTHXhXiU8vTqNDrP/XBJehD6LtVaNmdYzybmMFi2pdc -RCjNQsjCG7O3aSHzSs0Gupwa/NrQA4qT1lYdGbkbt09f765afoYMO/jA1jkuEiCV -n7RjAHhkZPRkSgFAW5MmkdpRjaWWHLwPWPWzw4+B0kVG7K0rwkH2QzG5RNiiLpYz -/WbM6Bos9jOUGBmt90Qu -=a3sY ------END PGP PUBLIC KEY BLOCK----- -pub 4096R/4BC5CC4F 2017-05-31 - Key fingerprint = 5B4C 5D7A 6D53 E269 038F 5A83 9388 FB14 4BC5 CC4F -uid Zhiyuan Yang (CODE SIGNING KEY) -sig 3 4BC5CC4F 2017-05-31 Zhiyuan Yang (CODE SIGNING KEY) -sub 4096R/59CBF136 2017-05-31 -sig 4BC5CC4F 2017-05-31 Zhiyuan Yang (CODE SIGNING KEY) - ------BEGIN PGP PUBLIC KEY BLOCK----- -Version: GnuPG v1 - -mQINBFkvMxABEAC0N5fgEnoUIL84Zxg9KCXAU01LHiTySPKyCTC5EHyGTj7f0Y1I -rmFBc3lFgi6PTTUx8TOsUhfYwJYjBPx+aVVZrWk/zItdzxVVf/w/vTZCF5oqF6EI -wrTwSgZ8QxL9FWw49zLtMeZLGWMpEmRYNFodHyp7QxHfTghd5u6jDj5l2MBUNdkc -0FrTu9R0GaAXB6qtQS8qyFSHarfSNYAYoQGWroHtkI6brzpm2hWEYZA1Kj9Ge8DE -GpgSpKWnbW6vANSRrEglWMuH48i6eQgD0v6J+CltJYlvedbO24XmLorfpVgeuTEl -izhM5KEur34ePKPxT9nf7hhMscXBm6G+vsH8QUcuwKA/qOryGLc6qf0SkOPONqg8 -tL45HX3Q7WlfTRMvMEREeqIVsfffCWhUAHqI4zJlPAqktmfAWBlUf+mcQNRQbRw9 -lYD46XF5QnPP3Bhoz6PCAVu01thVwXiX2IAI5GKinzJOdePgRJRc0t34cDDdjyt2 -9RpiOmmMEp5s1Kjan3/iDIsBenCVTEQKDeQpa+73CD9SHoYMftbyNBpvX6xzY0Np -PBPQrA+HLXoAYYfxClSiVpVHYIgBJGrqEckwAY4LbYEqj2n74GYpqxmhUMhx+SLH -QomSKhfRCyGG/qDxOmTlIX1uzo6XaN5gCNdGS5deQESgRqopAEp2v8dxRwARAQAB -tDVaaGl5dWFuIFlhbmcgKENPREUgU0lHTklORyBLRVkpIDx6aGl5dWFueUBhcGFj -aGUub3JnPokCNwQTAQoAIQUCWS8zEAIbAwULCQgHAwUVCgkICwUWAgMBAAIeAQIX -gAAKCRCTiPsUS8XMT7SoEACX1kgxnpfOWiWVeS4QB3pWVpP/VmB4qjZNPY/0RXYv -MaYZ0HENCHrJBEDgx3iqMkCRj0SY6rqUGHNRKdtLzzsLYU7Co/IFz4D2xAk/xqjr -z6LrLr56EG/lQVwfprJoEg71dTGzVJfYWJ1rQvoTGEvIZakHZ3OI1gB1oG6P9Fe/ -0CoyXkiifMYRccbILQLhfiqx6ZAdKJm2Obr28GVwOiF0TdRiKxi47bgsUd49h5fD -L87bTrNDCDc1DAN3gtL2uPuYr5YwQttDWHAKqIIazG8vWcrHLvPQ6837+OEYu+Zq -s93nntGWDGM7zYoVMRZ5zOMTSPOuh5MHJ1CD7VSFVCCs9GtyPRRqU235if/U9pei -C1N5fOv4greQwMFmAbgjNvOVvSbAeo7ot/byk8jlMFELt1hveRijbW7DFLeYHo2N -xxRjLqSgac38V+nWoWDqAYxEVXkBqzkijhrHqYWozMeHMNTIjF7ho9VAptsItfBz -hrmXW4HlvFeJBBjOX+my7CAGa89YkmP9H/MuCzhqFSOadleQ+pqph8omG+kjR6aj -HgsIs2aOFLDz6Y9JB1WYFCoRvuGj5ytjq/H04r0z3Y9AVSEkUnNLeX3Ugyje+1+3 -YQyU2RmREe/dd+cqessmlOoqzAZGSzh1vhEO59IL0BXQd7I2d/2crQIfz1yxrzZ7 -57kCDQRZLzMQARAA81I/XB6HMN4CX4mav9xqakyI7WlKiwzHGdbdxsqcwu343hlN -fDn7KRH+fzqzw9XujsG24nLR1wKHG7hUB6rzRJ5iz7xfpQ5pa6tcLMQTk1QkJNlL -fq0NCK2GAHshUVMBlRXMeiHlf2c//Mwhjm1La4dd5eL9lrHEOXZOP4woc+ngAgKW -PctHCoih7Oo+THKY1fFqde+GYEW1YrwlLcDQP+WY1D0l3ryr4eqZmOP50pMNQsmK -P1mp1Tr2DyPmGQKI/mscZkfWE023Iu1RCbJoQ7xnHV8etFIYaQEf5uC0JQjnZV3h -Waqc8RqbZA/fljZ4EQar/BFDC6pJXA47gTX7GrLaOKhNuBtNCmsg35ROXBT9YvA/ -29Fw1a9ZBuEy9FpCBCHU+IAFrOcSYBC/lIkSWOO85DhPMjxIRuv+z2KJmsOdB8xu -Fzrd1SUGCONcw/ApAbtrkkVvxe7k3vkqOa+NYGDLIKxd3jGZZeBNgwK8D5V6uS4u -eh3pf0QU3UJBqmeuEdngve/Wwdu6FWwns40PLhRzQL4w0uomgxHJ7d5iah5u0cGz -hiagu6RUAs1GWU+ZlTiVL/2BMN34BQ6auYpOhDiB1p/sOGac6+UmjchPyBLcGsdp -O7bxvuvxxstB+QoQnl3zSKkTSTeDLEMP509pg4WJWfbOkn9zQIJa8Wf5BVMAEQEA -AYkCHwQYAQoACQUCWS8zEAIbDAAKCRCTiPsUS8XMT/+MEACWU5Ivdh4PjoBmxpHJ -odaHkkYR68Y5v4JSrtCUOrD2/EkOUKbSTjMOnengpvRFv+/6eYQeTqMfdlU97WlE -B5cuICEN/jVRxcU4HKRzSh0a4wQqFuxyFNHQsu1H51PBD68pB3nJWTt1fDdrefSF -epXZj9CyW2PM2lCYJIfAkxLCEJevk0ZJRrH4V2hy8iEPQOl8arXsH7l/cT3zhSJP -1BS8+kjNG8Kn5H22WSqxH4OhafxlvEMnMEMQZ+Voww6YDQQjG8Q13438JTy94fw4 -+8aP6cON/YQzhRK62PhL+0FYmqTAC/WlCBjnl7M2XhfgQUjCdJIx3EZyFLCOxitN -W+myzL6te47wJm9+IE8fd48pH3nvhduvokFOLt7uKckC0XCth41kjCK4iDA7T5G8 -v1kpx7z2KJvttvSe+UkSDVqENCkDqM04bDcq7HaP0S+jjC7NOzLz+XyFsw1CN0e9 -N8MPZTB7FO5yWXeU8gUlUsgyr5dMw6NFLiCUSeWanTf0UkJpIffadV6OeGFYE/4P -HdS208wOS9L5n9Y/XhyJBLnvl3OR9N9sUK67MzR19/nmldZQr3jMfseZsBWk6sHI -Qq/U6wJoMEY/jqYNH38nKVv2WCKUGqRC6k4gQYA3DhFr2vvJJzEwku3SdBB/Q/0m -IRkj5+CIG0OiKX+FQ4e1Tl2RaA== -=jxBF ------END PGP PUBLIC KEY BLOCK----- diff --git a/LICENSE.txt b/LICENSE similarity index 100% rename from LICENSE.txt rename to LICENSE diff --git a/tez-api/src/main/javadoc/resources/META-INF/NOTICE.txt b/NOTICE similarity index 69% rename from tez-api/src/main/javadoc/resources/META-INF/NOTICE.txt rename to NOTICE index 3f36fcc6ba..2595905699 100644 --- a/tez-api/src/main/javadoc/resources/META-INF/NOTICE.txt +++ b/NOTICE @@ -1,5 +1,5 @@ Apache Tez -Copyright (c) 2016 The Apache Software Foundation +Copyright 2014-2024 The Apache Software Foundation This product includes software developed at The Apache Software Foundation (http://www.apache.org/). diff --git a/Tez_DOAP.rdf b/Tez_DOAP.rdf index d50e5ab602..257b1e4d82 100644 --- a/Tez_DOAP.rdf +++ b/Tez_DOAP.rdf @@ -34,6 +34,55 @@ Java + + + Version 0.10.4 + 2024-09-15 + 0.10.4 + + + + + Version 0.10.3 + 2024-01-31 + 0.10.3 + + + + + Version 0.10.2 + 2022-07-30 + 0.10.2 + + + + + Version 0.10.1 + 2021-07-01 + 0.10.1 + + + + + Version 0.10.0 + 2020-10-15 + 0.10.0 + + + + + Version 0.9.2 + 2019-03-29 + 0.9.2 + + + + + Version 0.9.1 + 2018-01-04 + 0.9.1 + + Version 0.9.0 @@ -155,8 +204,8 @@ - - + + diff --git a/build-tools/.gitignore b/build-tools/.gitignore new file mode 100644 index 0000000000..adfc42ea48 --- /dev/null +++ b/build-tools/.gitignore @@ -0,0 +1,2 @@ +protobuf + diff --git a/build-tools/docker/Dockerfile b/build-tools/docker/Dockerfile new file mode 100644 index 0000000000..5b681067ae --- /dev/null +++ b/build-tools/docker/Dockerfile @@ -0,0 +1,279 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +############### +# +# Apache Yetus Dockerfile for Apache Tez +# NOTE: This file is compatible with Docker BuildKit. It will work +# with standard docker build, but it is a lot faster +# if BuildKit is enabled. +# +############### + +FROM ubuntu:jammy AS tezbase + +WORKDIR /root +SHELL ["/bin/bash", "-o", "pipefail", "-c"] + +ENV DEBIAN_FRONTEND noninteractive +ENV DEBCONF_TERSE true + +###### +# Install some basic Apache Yetus requirements +# some git repos need ssh-client so do it too +# Adding libffi-dev for all the programming languages +# that take advantage of it. +###### +# hadolint ignore=DL3008 +RUN apt-get -q update && apt-get -q install --no-install-recommends -y \ + apt-transport-https \ + apt-utils \ + bzip2 \ + ca-certificates \ + curl \ + dirmngr \ + git \ + gpg \ + gpg-agent \ + libffi-dev \ + locales \ + make \ + pkg-config \ + rsync \ + software-properties-common \ + ssh-client \ + xz-utils \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* + +### +# Set the locale +### +RUN locale-gen en_US.UTF-8 +ENV LANG en_US.UTF-8 +ENV LANGUAGE en_US:en +ENV LC_ALL en_US.UTF-8 + +#### +# Install GNU C/C++ (everything generally needs this) +#### +# hadolint ignore=DL3008 +RUN apt-get -q update && apt-get -q install --no-install-recommends -y \ + g++ \ + gcc \ + libc-dev \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* + +### +# Install golang as part of base so we can do each +# helper utility in parallel. go bins are typically +# statically linked, so this is perfectly safe. +### +# hadolint ignore=DL3008 +RUN add-apt-repository -y ppa:longsleep/golang-backports \ + && apt-get -q update \ + && apt-get -q install --no-install-recommends -y golang-go \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* + +############ +# Fetch all of the non-conflicting bits in parallel +############# + +###### +# Install Google Protobuf 3.21.1 +###### +FROM tezbase AS protobuf +SHELL ["/bin/bash", "-o", "pipefail", "-c"] +RUN mkdir -p /opt/protobuf-src \ + && curl -L -s -S \ + https://github.com/protocolbuffers/protobuf/releases/download/v21.1/protobuf-java-3.21.1.tar.gz \ + -o /opt/protobuf.tar.gz \ + && tar xzf /opt/protobuf.tar.gz --strip-components 1 -C /opt/protobuf-src +WORKDIR /opt/protobuf-src +RUN ./configure --prefix=/opt/protobuf \ + && make install +WORKDIR /root +RUN rm -rf /opt/protobuf-src + +#### +# Install shellcheck (shell script lint) +#### +FROM tezbase AS shellcheck +SHELL ["/bin/bash", "-o", "pipefail", "-c"] +RUN curl -sSL \ + https://github.com/koalaman/shellcheck/releases/download/v0.7.1/shellcheck-v0.7.1.linux.x86_64.tar.xz \ + | tar --strip-components 1 --wildcards -xJf - '*/shellcheck' \ + && chmod a+rx shellcheck \ + && mv shellcheck /bin/shellcheck \ + && shasum -a 512 /bin/shellcheck \ + | awk '$1!="aae813283d49f18f95a205dca1c5184267d07534a08abc952ebea1958fee06f8a0207373b6770a083079ba875458ea9da443f2b9910a50dcd93b935048bc14f5" {exit(1)}' + +#### +# Install hadolint (dockerfile lint) +#### +FROM tezbase AS hadolint +SHELL ["/bin/bash", "-o", "pipefail", "-c"] +RUN curl -sSL \ + https://github.com/hadolint/hadolint/releases/download/v1.18.0/hadolint-Linux-x86_64 \ + -o /bin/hadolint \ + && chmod a+rx /bin/hadolint \ + && shasum -a 512 /bin/hadolint \ + | awk '$1!="df27253d374c143a606483b07a26234ac7b4bca40b4eba53e79609c81aa70146e7d5c145f90dcec71d6d1aad1048b7d9d2de68d92284f48a735d04d19c5c5559" {exit(1)}' + +#### +# Install buf (protobuf lint) +#### +FROM tezbase AS buf +SHELL ["/bin/bash", "-o", "pipefail", "-c"] +RUN curl -sSL \ + https://github.com/bufbuild/buf/releases/download/v0.21.0/buf-Linux-x86_64.tar.gz \ + -o buf.tar.gz \ + && shasum -a 256 buf.tar.gz \ + | awk '$1!="95aba62ac0ecc5a9120cc58c65cdcc85038633a816bddfe8398c5ae3b32803f1" {exit(1)}' \ + && tar -xzf buf.tar.gz -C /usr/local --strip-components 1 \ + && rm buf.tar.gz + +######## +# +# +# Content that needs to be installed in order due to packages... +# +# +######## + +FROM tezbase +SHELL ["/bin/bash", "-o", "pipefail", "-c"] + +#### +# Install java (first, since we want to dicate what form of Java) +#### + +#### +# OpenJDK 8 +#### +# hadolint ignore=DL3008 +RUN apt-get -q update && apt-get -q install --no-install-recommends -y openjdk-8-jdk-headless \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* + +#### +# OpenJDK 11 (but keeps default to JDK8) +# NOTE: This default only works when Apache Yetus is launched +# _in_ the container and not outside of it! +#### +# hadolint ignore=DL3008 +RUN apt-get -q update && apt-get -q install --no-install-recommends -y default-jre-headless openjdk-11-jdk-headless \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* \ + && update-java-alternatives -s java-1.8.0-openjdk-amd64 || : +# since update alternatives might fail on executables that we don't really need (e.g. appletviewer) +# and return with exit code <0 (actually: 2), we can simply do a sanity check if the version is +# as expected for "java" executable after the update and go on +RUN java -version 2>&1 | grep "1.8.0" && rm -f /usr/lib/jvm/default-java \ + && ln -s java-8-openjdk-amd64 /usr/lib/jvm/default-java +ENV JAVA_HOME /usr/lib/jvm/java-8-openjdk-amd64 + +###### +# Install findbugs +###### +# hadolint ignore=DL3008 +RUN apt-get -q update && apt-get -q install --no-install-recommends -y findbugs \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* +ENV FINDBUGS_HOME /usr + +###### +# Install maven +###### +# hadolint ignore=DL3008 +RUN apt-get -q update && apt-get -q install --no-install-recommends -y maven \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* + +###### +# Install python3 and pylint3 +# astroid and pylint go hand-in-hand. Upgrade both at the same time. +###### +# hadolint ignore=DL3008,DL3013 +RUN apt-get -q update && apt-get -q install --no-install-recommends -y \ + python3 \ + python3-bcrypt \ + python3-cffi \ + python3-cryptography \ + python3-dateutil \ + python3-dev \ + python3-dev \ + python3-isort \ + python3-dockerpty \ + python3-nacl \ + python3-pyrsistent \ + python3-setuptools \ + python3-setuptools \ + python3-singledispatch \ + python3-six \ + python3-wheel \ + python3-wrapt \ + python3-yaml \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* \ + && curl -sSL https://bootstrap.pypa.io/get-pip.py -o /tmp/get-pip.py \ + && python3 /tmp/get-pip.py \ + && rm /usr/local/bin/pip /tmp/get-pip.py \ + && pip3 install -v \ + astroid==2.4.2 \ + codespell==2.0 \ + pylint==2.5.3 \ + yamllint==1.24.2 \ + && rm -rf /root/.cache \ + && mv /usr/local/bin/pylint /usr/local/bin/pylint3 +RUN ln -s /usr/local/bin/pylint3 /usr/local/bin/pylint +RUN ln -s /usr/local/bin/pip3 /usr/local/bin/pip + +### +# Install npm and JSHint +### +# hadolint ignore=DL3008 +RUN curl -sSL https://deb.nodesource.com/setup_14.x | bash - \ + && apt-get -q install --no-install-recommends -y nodejs \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* \ + && npm install -g \ + jshint@2.12.0 \ + markdownlint-cli@0.23.2 \ + && rm -rf /root/.npm + +##### +# Now all the stuff that was built in parallel +##### + +COPY --from=shellcheck /bin/shellcheck /bin/shellcheck +COPY --from=hadolint /bin/hadolint /bin/hadolint +COPY --from=buf /usr/local/bin/buf /usr/local/bin/buf +COPY --from=protobuf /opt/protobuf /opt/protobuf + +ENV PROTOBUF_HOME /opt/protobuf +ENV PROTOC_PATH /opt/protobuf/bin/protoc +ENV PATH "${PATH}:/opt/protobuf/bin" + +#### +# YETUS CUT HERE +# Magic text above! Everything from here on is ignored +# by Yetus, so could include anything not needed +# by your testing environment +### diff --git a/build-tools/test-patch.sh b/build-tools/test-patch.sh index e1fa0fb00d..9b457af221 100755 --- a/build-tools/test-patch.sh +++ b/build-tools/test-patch.sh @@ -38,6 +38,8 @@ GREP=${GREP:-grep} PATCH=${PATCH:-patch} DIFF=${DIFF:-diff} JIRACLI=${JIRA:-jira} +SED=${SED:-sed} +CURL=${CURL:-curl} FINDBUGS_HOME=${FINDBUGS_HOME} ############################################################################### @@ -702,10 +704,24 @@ $comment" echo "======================================================================" echo "" echo "" + + # RESTify the comment + jsoncomment=$(echo "$comment" \ + | ${SED} -e 's,\\,\\\\,g' \ + -e 's,\",\\\",g' \ + -e 's,$,\\r\\n,g' \ + | tr -d '\n') + jsoncomment='{"body":"'"$jsoncomment"'"}' + ### Update Jira with a comment - export USER=hudson - $JIRACLI -s https://issues.apache.org/jira -a addcomment -u tezqa -p $JIRA_PASSWD --comment "$comment" --issue $defect - $JIRACLI -s https://issues.apache.org/jira -a logout -u tezqa -p $JIRA_PASSWD + ${CURL} -X POST \ + -H "Accept: application/json" \ + -H "Content-Type: application/json" \ + -u "tezqa:${JIRA_PASSWD}" \ + -d "$jsoncomment" \ + --silent --location \ + "https://issues.apache.org/jira/rest/api/2/issue/${defect}/comment" \ + >/dev/null fi } diff --git a/docs/pom.xml b/docs/pom.xml index 472975d04d..9517e45770 100644 --- a/docs/pom.xml +++ b/docs/pom.xml @@ -21,13 +21,13 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd"> Tez Tez is an application framework which allows for a complex directed-acyclic-graph of tasks for processing data and is built atop Apache Hadoop YARN. - http://tez.apache.org/ + https://tez.apache.org/ 4.0.0 org.apache.tez tez - 0.9.1-SNAPSHOT + 0.10.5-SNAPSHOT tez-docs pom @@ -35,14 +35,14 @@ Apache 2 - http://www.apache.org/licenses/LICENSE-2.0.txt + https://www.apache.org/licenses/LICENSE-2.0.txt repo Jira - http://issues.apache.org/jira/browse/TEZ + https://issues.apache.org/jira/browse/TEZ @@ -53,7 +53,7 @@ mailto:user-unsubscribe@tez.apache.org mailto:user@tez.apache.org - http://mail-archives.apache.org/mod_mbox/tez-user/ + https://mail-archives.apache.org/mod_mbox/tez-user/ Development list @@ -62,7 +62,7 @@ mailto:dev-unsubscribe@tez.apache.org mailto:dev@tez.apache.org - http://mail-archives.apache.org/mod_mbox/tez-dev/ + https://mail-archives.apache.org/mod_mbox/tez-dev/ Commit list @@ -71,7 +71,7 @@ mailto:commits-unsubscribe@tez.apache.org mailto:commits@tez.apache.org - http://mail-archives.apache.org/mod_mbox/tez-commits/ + https://mail-archives.apache.org/mod_mbox/tez-commits/ JIRA Issues list @@ -80,7 +80,7 @@ mailto:issues-unsubscribe@tez.apache.org mailto:issues@tez.apache.org - http://mail-archives.apache.org/mod_mbox/tez-issues/ + https://mail-archives.apache.org/mod_mbox/tez-issues/ @@ -238,6 +238,15 @@ PMC + + abstractdog + Laszlo Bodor + abstractdog@apache.org + +1 + + Committer + + kamrul Mohammad Kamrul Islam @@ -431,7 +440,7 @@ Apache Software Foundation - http://www.apache.org/ + https://www.apache.org/ @@ -458,6 +467,7 @@ org.apache.maven.plugins maven-site-plugin + ${maven-site-plugin.version} ./target @@ -478,6 +488,7 @@ org.apache.maven.plugins maven-project-info-reports-plugin + ${maven-project-info-reports-plugin.version} ${project.basedir}/src/site/custom/project-info-report.properties false @@ -485,10 +496,10 @@ - project-team - mailing-list - issue-tracking - license + team + mailing-lists + issue-management + licenses diff --git a/docs/src/site/markdown/by-laws.md b/docs/src/site/markdown/by-laws.md index 28ecabb852..4ea47d544b 100644 --- a/docs/src/site/markdown/by-laws.md +++ b/docs/src/site/markdown/by-laws.md @@ -42,7 +42,8 @@ All of the volunteers who are contributing time, code, documentation, or resourc The project's Committers are responsible for the project's technical management. All committers have write access to the project's source repositories. Committers may cast binding votes on any technical discussion regarding the project. -Committer access is by invitation only and must be approved by lazy consensus of the active PMC members. A Committer may request removal of their commit privileges by their own declaration. A committer will be considered "emeritus/inactive" by not contributing in any form to the project for over 1 year. An emeritus committer may request reinstatement of commit access from the PMC. Such reinstatement is subject to lazy consensus of active PMC members. + +Committer access is by invitation only and must be approved by lazy consensus of the active PMC members. A Committer may request removal of their commit privileges by their own declaration. Commit access can be revoked by a unanimous vote of all the active PMC members (except the committer in question if they are also a PMC member). @@ -61,7 +62,7 @@ The Project Management Committee (PMC) for Apache Tez was created by a resolutio - Nominating new PMC members and committers - Maintaining these bylaws and other guidelines of the project -Membership of the PMC is by invitation only and must be approved by a lazy consensus of active PMC members. A PMC member is considered "emeritus/inactive" by not contributing in any form to the project for over one year. An emeritus PMC member may request reinstatement to the PMC. Such reinstatement is subject to lazy consensus of active PMC members. A PMC member may resign their membership from the PMC by their own declaration. Membership of the PMC can be revoked by an unanimous vote of all the active PMC members other than the member in question. +Membership of the PMC is by invitation only and must be approved by a lazy consensus of active PMC members. A PMC member may resign their membership from the PMC by their own declaration. Membership of the PMC can also be revoked via a Board resolution. The chair of the PMC is appointed by the ASF board. The chair is an office holder of the Apache Software Foundation (Vice President, Apache Tez) and has primary responsibility to the board for the management of the projects within the scope of the Tez PMC. The chair reports to the board quarterly on developments within the Tez project. The PMC may consider the position of PMC chair annually, and if supported by a successful vote to change the PMC chair, may recommend a new chair to the board. Ultimately, however, it is the board's responsibility who it chooses to appoint as the PMC chair. @@ -123,9 +124,9 @@ This section describes the various actions which are undertaken within the proje Votes are open for a period of a minimum of 3 days (excluding weekend days) to allow all active voters time to consider the vote. For any votes requiring full consensus or a 2/3 majority, the vote should remain open for a minimum of 1 week. Votes relating to code changes are not subject to a strict timetable but should be made as timely as possible. -[Apache Software Foundation]: http://www.apache.org/foundation/ -[Incubator project]: http://incubator.apache.org/ -[Foundation FAQ]: http://www.apache.org/foundation/faq.html -[Committer FAQ]: http://www.apache.org/dev/committers.html -[CLA]: http://www.apache.org/licenses/icla.txt -[set of roles]: http://www.apache.org/foundation/how-it-works.html#roles +[Apache Software Foundation]: https://www.apache.org/foundation/ +[Incubator project]: https://incubator.apache.org/ +[Foundation FAQ]: https://www.apache.org/foundation/faq.html +[Committer FAQ]: https://www.apache.org/dev/committers.html +[CLA]: https://www.apache.org/licenses/icla.txt +[set of roles]: https://www.apache.org/foundation/how-it-works.html#roles diff --git a/docs/src/site/markdown/index.md b/docs/src/site/markdown/index.md index d0f307a4b4..fc47f533a1 100644 --- a/docs/src/site/markdown/index.md +++ b/docs/src/site/markdown/index.md @@ -15,15 +15,15 @@ limitations under the License. --> -Welcome to Apache Tez™ +Welcome to Apache TEZ® Introduction ------------ -The Apache Tez™ project is aimed at building an application framework +The Apache TEZ® project is aimed at building an application framework which allows for a complex directed-acyclic-graph of tasks for processing data. It is currently built atop -[Apache Hadoop YARN](http://hadoop.apache.org/docs/current/hadoop-yarn/hadoop-yarn-site/YARN.html). +[Apache Hadoop YARN](https://hadoop.apache.org/docs/current/hadoop-yarn/hadoop-yarn-site/YARN.html). The 2 main design themes for Tez are: diff --git a/docs/src/site/markdown/install.md b/docs/src/site/markdown/install.md index b1cdcf9583..44ca862a82 100644 --- a/docs/src/site/markdown/install.md +++ b/docs/src/site/markdown/install.md @@ -34,11 +34,11 @@ or higher. 2. Build tez using `mvn clean package -DskipTests=true -Dmaven.javadoc.skip=true` - This assumes that you have already installed JDK8 or later and Maven 3 or later. - - Tez also requires Protocol Buffers 2.5.0, including the protoc-compiler. + - Tez also requires Protocol Buffers 3.19.4, including the protoc-compiler. * This can be downloaded from https://github.com/google/protobuf/tags/. * On Mac OS X with the homebrew package manager `brew install protobuf250` - * For rpm-based linux systems, the yum repos may not have the 2.5.0 version. - `rpm.pbone.net` has the protobuf-2.5.0 and protobuf-compiler-2.5.0 packages. + * For rpm-based linux systems, the yum repos may not have the 3.19.4 version. + `rpm.pbone.net` has the protobuf-3.19.4 and protobuf-compiler-3.19.4 packages. - If you prefer to run the unit tests, remove skipTests from the command above. - If you use Eclipse IDE, you can import the projects using diff --git a/docs/src/site/markdown/install_pre_0_5_0.md b/docs/src/site/markdown/install_pre_0_5_0.md index 494ff54270..5123c14014 100644 --- a/docs/src/site/markdown/install_pre_0_5_0.md +++ b/docs/src/site/markdown/install_pre_0_5_0.md @@ -20,7 +20,7 @@ [Install instructions for Tez (post 0.5.0)](./install.html) ----------------------------------------------------------------------------------- -Install/Deploy Instructions for Tez release pre-0.5.0 E.g. [(Tez-0.4.1)](http://archive.apache.org/dist/incubator/tez/tez-0.4.1-incubating/) +Install/Deploy Instructions for Tez release pre-0.5.0 E.g. [(Tez-0.4.1)](https://archive.apache.org/dist/incubator/tez/tez-0.4.1-incubating/) -------------------------------------------------------------------------------------------------------------------------------------------------- 1. Deploy Apache Hadoop using either the 2.2.0 release or a compatible diff --git a/docs/src/site/markdown/privacy-policy.md b/docs/src/site/markdown/privacy-policy.md deleted file mode 100644 index 95825d0604..0000000000 --- a/docs/src/site/markdown/privacy-policy.md +++ /dev/null @@ -1,52 +0,0 @@ - - -Privacy Policy - -Privacy Policy --------------- - -Information about your use of this website is collected using server -access logs and a tracking cookie. The collected information consists of -the following: - -1. The IP address from which you access the website; - -2. The type of browser and operating system you use to access our site; - -3. The date and time you access our site; - -4. The pages you visit; and - -5. The addresses of pages from where you followed a link to our site. - -Part of this information is gathered using a tracking cookie set by the -[Google Analytics](http://www.google.com/analytics/) service and handled -by Google as described in their [privacy -policy](http://www.google.com/privacy.html). See your browser -documentation for instructions on how to disable the cookie if you -prefer not to share this data with Google. - -We use the gathered information to help us make our site more useful to -visitors and to better understand how and when our site is used. We do -not track or collect personally identifiable information or associate -gathered data with any personally identifying information from other -sources. - -By using this website, you consent to the collection of this data in the -manner and for the purpose described above. - diff --git a/docs/src/site/markdown/releases/apache-tez-0-10-0.md b/docs/src/site/markdown/releases/apache-tez-0-10-0.md new file mode 100644 index 0000000000..0ba6f7e243 --- /dev/null +++ b/docs/src/site/markdown/releases/apache-tez-0-10-0.md @@ -0,0 +1,30 @@ + + +Apache TEZ® 0.10.0 + +Apache TEZ® 0.10.0 +---------------------- + +- [Download Release Artifacts](http://www.apache.org/dyn/closer.lua/tez/0.10.0/) +- [Release Notes](0.10.0/release-notes.txt) +- Documentation + - [API Javadocs](0.10.0/tez-api-javadocs/index.html) : Documentation for the Tez APIs + - [Runtime Library Javadocs](0.10.0/tez-runtime-library-javadocs/index.html) : Documentation for built-in implementations of useful Inputs, Outputs, Processors etc. written based on the Tez APIs + - [Tez Mapreduce Javadocs](0.10.0/tez-mapreduce-javadocs/index.html) : Documentation for built-in implementations of Mapreduce compatible Inputs, Outputs, Processors etc. written based on the Tez APIs + - [Tez Configuration](0.10.0/tez-api-javadocs/configs/TezConfiguration.html) : Documentation for configurations of Tez. These configurations are typically specified in tez-site.xml. + - [Tez Runtime Configuration](0.10.0/tez-runtime-library-javadocs/configs/TezRuntimeConfiguration.html) : Documentation for runtime configurations of Tez. These configurations are typically specified by job submitters. diff --git a/docs/src/site/markdown/releases/apache-tez-0-10-1.md b/docs/src/site/markdown/releases/apache-tez-0-10-1.md new file mode 100644 index 0000000000..efe346aae0 --- /dev/null +++ b/docs/src/site/markdown/releases/apache-tez-0-10-1.md @@ -0,0 +1,30 @@ + + +Apache TEZ® 0.10.1 + +Apache TEZ® 0.10.1 +---------------------- + +- [Download Release Artifacts](http://www.apache.org/dyn/closer.lua/tez/0.10.1/) +- [Release Notes](0.10.1/release-notes.txt) +- Documentation + - [API Javadocs](0.10.1/tez-api-javadocs/index.html) : Documentation for the Tez APIs + - [Runtime Library Javadocs](0.10.1/tez-runtime-library-javadocs/index.html) : Documentation for built-in implementations of useful Inputs, Outputs, Processors etc. written based on the Tez APIs + - [Tez Mapreduce Javadocs](0.10.1/tez-mapreduce-javadocs/index.html) : Documentation for built-in implementations of Mapreduce compatible Inputs, Outputs, Processors etc. written based on the Tez APIs + - [Tez Configuration](0.10.1/tez-api-javadocs/configs/TezConfiguration.html) : Documentation for configurations of Tez. These configurations are typically specified in tez-site.xml. + - [Tez Runtime Configuration](0.10.1/tez-runtime-library-javadocs/configs/TezRuntimeConfiguration.html) : Documentation for runtime configurations of Tez. These configurations are typically specified by job submitters. diff --git a/docs/src/site/markdown/releases/apache-tez-0-10-2.md b/docs/src/site/markdown/releases/apache-tez-0-10-2.md new file mode 100644 index 0000000000..2e3cc49c26 --- /dev/null +++ b/docs/src/site/markdown/releases/apache-tez-0-10-2.md @@ -0,0 +1,30 @@ + + +Apache TEZ® 0.10.2 + +Apache TEZ® 0.10.2 +---------------------- + +- [Download Release Artifacts](http://www.apache.org/dyn/closer.lua/tez/0.10.2/) +- [Release Notes](0.10.2/release-notes.txt) +- Documentation + - [API Javadocs](0.10.2/tez-api-javadocs/index.html) : Documentation for the Tez APIs + - [Runtime Library Javadocs](0.10.2/tez-runtime-library-javadocs/index.html) : Documentation for built-in implementations of useful Inputs, Outputs, Processors etc. written based on the Tez APIs + - [Tez Mapreduce Javadocs](0.10.2/tez-mapreduce-javadocs/index.html) : Documentation for built-in implementations of Mapreduce compatible Inputs, Outputs, Processors etc. written based on the Tez APIs + - [Tez Configuration](0.10.2/tez-api-javadocs/configs/TezConfiguration.html) : Documentation for configurations of Tez. These configurations are typically specified in tez-site.xml. + - [Tez Runtime Configuration](0.10.2/tez-runtime-library-javadocs/configs/TezRuntimeConfiguration.html) : Documentation for runtime configurations of Tez. These configurations are typically specified by job submitters. diff --git a/docs/src/site/markdown/releases/apache-tez-0-10-3.md b/docs/src/site/markdown/releases/apache-tez-0-10-3.md new file mode 100644 index 0000000000..3e86896861 --- /dev/null +++ b/docs/src/site/markdown/releases/apache-tez-0-10-3.md @@ -0,0 +1,30 @@ + + +Apache TEZ® 0.10.3 + +Apache TEZ® 0.10.3 +---------------------- + +- [Download Release Artifacts](http://www.apache.org/dyn/closer.lua/tez/0.10.3/) +- [Release Notes](0.10.3/release-notes.txt) +- Documentation + - [API Javadocs](0.10.3/tez-api-javadocs/index.html) : Documentation for the Tez APIs + - [Runtime Library Javadocs](0.10.3/tez-runtime-library-javadocs/index.html) : Documentation for built-in implementations of useful Inputs, Outputs, Processors etc. written based on the Tez APIs + - [Tez Mapreduce Javadocs](0.10.3/tez-mapreduce-javadocs/index.html) : Documentation for built-in implementations of Mapreduce compatible Inputs, Outputs, Processors etc. written based on the Tez APIs + - [Tez Configuration](0.10.3/tez-api-javadocs/configs/TezConfiguration.html) : Documentation for configurations of Tez. These configurations are typically specified in tez-site.xml. + - [Tez Runtime Configuration](0.10.3/tez-runtime-library-javadocs/configs/TezRuntimeConfiguration.html) : Documentation for runtime configurations of Tez. These configurations are typically specified by job submitters. diff --git a/docs/src/site/markdown/releases/apache-tez-0-10-4.md b/docs/src/site/markdown/releases/apache-tez-0-10-4.md new file mode 100644 index 0000000000..7028172664 --- /dev/null +++ b/docs/src/site/markdown/releases/apache-tez-0-10-4.md @@ -0,0 +1,30 @@ + + +Apache TEZ® 0.10.4 + +Apache TEZ® 0.10.4 +---------------------- + +- [Download Release Artifacts](http://www.apache.org/dyn/closer.lua/tez/0.10.4/) +- [Release Notes](0.10.4/release-notes.txt) +- Documentation + - [API Javadocs](0.10.4/tez-api-javadocs/index.html) : Documentation for the Tez APIs + - [Runtime Library Javadocs](0.10.4/tez-runtime-library-javadocs/index.html) : Documentation for built-in implementations of useful Inputs, Outputs, Processors etc. written based on the Tez APIs + - [Tez Mapreduce Javadocs](0.10.4/tez-mapreduce-javadocs/index.html) : Documentation for built-in implementations of Mapreduce compatible Inputs, Outputs, Processors etc. written based on the Tez APIs + - [Tez Configuration](0.10.4/tez-api-javadocs/configs/TezConfiguration.html) : Documentation for configurations of Tez. These configurations are typically specified in tez-site.xml. + - [Tez Runtime Configuration](0.10.4/tez-runtime-library-javadocs/configs/TezRuntimeConfiguration.html) : Documentation for runtime configurations of Tez. These configurations are typically specified by job submitters. diff --git a/docs/src/site/markdown/releases/apache-tez-0-5-0.md b/docs/src/site/markdown/releases/apache-tez-0-5-0.md index f206ea6f7f..c8cd2e696f 100644 --- a/docs/src/site/markdown/releases/apache-tez-0-5-0.md +++ b/docs/src/site/markdown/releases/apache-tez-0-5-0.md @@ -15,9 +15,9 @@ limitations under the License. --> -Apache Tez™ 0.5.0 +Apache TEZ® 0.5.0 -Apache Tez™ 0.5.0 +Apache TEZ® 0.5.0 ---------------- - [Download Release Artifacts](http://archive.apache.org/dist/tez/0.5.0/) diff --git a/docs/src/site/markdown/releases/apache-tez-0-5-1.md b/docs/src/site/markdown/releases/apache-tez-0-5-1.md index 01417ba347..12c3c001ab 100644 --- a/docs/src/site/markdown/releases/apache-tez-0-5-1.md +++ b/docs/src/site/markdown/releases/apache-tez-0-5-1.md @@ -15,9 +15,9 @@ limitations under the License. --> -Apache Tez™ 0.5.1 +Apache TEZ® 0.5.1 -Apache Tez™ 0.5.1 +Apache TEZ® 0.5.1 ---------------- - [Download Release Artifacts](http://archive.apache.org/dist/tez/0.5.1/) diff --git a/docs/src/site/markdown/releases/apache-tez-0-5-2.md b/docs/src/site/markdown/releases/apache-tez-0-5-2.md index 99ca44b759..6acb130e47 100644 --- a/docs/src/site/markdown/releases/apache-tez-0-5-2.md +++ b/docs/src/site/markdown/releases/apache-tez-0-5-2.md @@ -15,9 +15,9 @@ limitations under the License. --> -Apache Tez™ 0.5.2 +Apache TEZ® 0.5.2 -Apache Tez™ 0.5.2 +Apache TEZ® 0.5.2 ---------------- - [Download Release Artifacts](http://archive.apache.org/dist/tez/0.5.2/) diff --git a/docs/src/site/markdown/releases/apache-tez-0-5-3.md b/docs/src/site/markdown/releases/apache-tez-0-5-3.md index 7e00fe9092..750fe30536 100644 --- a/docs/src/site/markdown/releases/apache-tez-0-5-3.md +++ b/docs/src/site/markdown/releases/apache-tez-0-5-3.md @@ -15,9 +15,9 @@ limitations under the License. --> -Apache Tez™ 0.5.3 +Apache TEZ® 0.5.3 -Apache Tez™ 0.5.3 +Apache TEZ® 0.5.3 ---------------- - [Download Release Artifacts](http://archive.apache.org/dist/tez/0.5.3/) diff --git a/docs/src/site/markdown/releases/apache-tez-0-5-4.md b/docs/src/site/markdown/releases/apache-tez-0-5-4.md index 9d3f96c03b..54c5d0501d 100644 --- a/docs/src/site/markdown/releases/apache-tez-0-5-4.md +++ b/docs/src/site/markdown/releases/apache-tez-0-5-4.md @@ -15,9 +15,9 @@ limitations under the License. --> -Apache Tez™ 0.5.4 +Apache TEZ® 0.5.4 -Apache Tez™ 0.5.4 +Apache TEZ® 0.5.4 ---------------- - [Download Release Artifacts](http://www.apache.org/dyn/closer.lua/tez/0.5.4/) diff --git a/docs/src/site/markdown/releases/apache-tez-0-6-0.md b/docs/src/site/markdown/releases/apache-tez-0-6-0.md index 473d03b807..96048bcb28 100644 --- a/docs/src/site/markdown/releases/apache-tez-0-6-0.md +++ b/docs/src/site/markdown/releases/apache-tez-0-6-0.md @@ -15,9 +15,9 @@ limitations under the License. --> -Apache Tez™ 0.6.0 +Apache TEZ® 0.6.0 -Apache Tez™ 0.6.0 +Apache TEZ® 0.6.0 ---------------- - [Download Release Artifacts](http://archive.apache.org/dist/tez/0.6.0/) diff --git a/docs/src/site/markdown/releases/apache-tez-0-6-1.md b/docs/src/site/markdown/releases/apache-tez-0-6-1.md index 12854778b2..17b7d647e5 100644 --- a/docs/src/site/markdown/releases/apache-tez-0-6-1.md +++ b/docs/src/site/markdown/releases/apache-tez-0-6-1.md @@ -15,9 +15,9 @@ limitations under the License. --> -Apache Tez™ 0.6.1 +Apache TEZ® 0.6.1 -Apache Tez™ 0.6.1 +Apache TEZ® 0.6.1 ---------------- - [Download Release Artifacts](http://archive.apache.org/dist/tez/tez/0.6.1/) diff --git a/docs/src/site/markdown/releases/apache-tez-0-6-2.md b/docs/src/site/markdown/releases/apache-tez-0-6-2.md index 5a898e6c96..b1f97572b2 100644 --- a/docs/src/site/markdown/releases/apache-tez-0-6-2.md +++ b/docs/src/site/markdown/releases/apache-tez-0-6-2.md @@ -15,9 +15,9 @@ limitations under the License. --> -Apache Tez™ 0.6.2 +Apache TEZ® 0.6.2 -Apache Tez™ 0.6.2 +Apache TEZ® 0.6.2 ---------------- - [Download Release Artifacts](http://www.apache.org/dyn/closer.lua/tez/0.6.2/) diff --git a/docs/src/site/markdown/releases/apache-tez-0-7-0.md b/docs/src/site/markdown/releases/apache-tez-0-7-0.md index 8c0f956c60..fd8b48d646 100644 --- a/docs/src/site/markdown/releases/apache-tez-0-7-0.md +++ b/docs/src/site/markdown/releases/apache-tez-0-7-0.md @@ -15,9 +15,9 @@ limitations under the License. --> -Apache Tez™ 0.7.0 +Apache TEZ® 0.7.0 -Apache Tez™ 0.7.0 +Apache TEZ® 0.7.0 ---------------- - [Download Release Artifacts](http://archive.apache.org/dist/tez/0.7.0/) diff --git a/docs/src/site/markdown/releases/apache-tez-0-7-1.md b/docs/src/site/markdown/releases/apache-tez-0-7-1.md index 1c1766f241..152bd47053 100644 --- a/docs/src/site/markdown/releases/apache-tez-0-7-1.md +++ b/docs/src/site/markdown/releases/apache-tez-0-7-1.md @@ -15,9 +15,9 @@ limitations under the License. --> -Apache Tez™ 0.7.1 +Apache TEZ® 0.7.1 -Apache Tez™ 0.7.1 +Apache TEZ® 0.7.1 ---------------- - [Download Release Artifacts](http://www.apache.org/dyn/closer.lua/tez/0.7.1/) diff --git a/docs/src/site/markdown/releases/apache-tez-0-8-0-alpha.md b/docs/src/site/markdown/releases/apache-tez-0-8-0-alpha.md index 5854ecd74f..2a1d4d18ec 100644 --- a/docs/src/site/markdown/releases/apache-tez-0-8-0-alpha.md +++ b/docs/src/site/markdown/releases/apache-tez-0-8-0-alpha.md @@ -15,9 +15,9 @@ limitations under the License. --> -Apache Tez™ 0.8.0-alpha +Apache TEZ® 0.8.0-alpha -Apache Tez™ 0.8.0-alpha +Apache TEZ® 0.8.0-alpha ---------------------- - [Download Release Artifacts](http://archive.apache.org/dist/tez/0.8.0-alpha/) diff --git a/docs/src/site/markdown/releases/apache-tez-0-8-1-alpha.md b/docs/src/site/markdown/releases/apache-tez-0-8-1-alpha.md index 806292beb9..c8db8ed56b 100644 --- a/docs/src/site/markdown/releases/apache-tez-0-8-1-alpha.md +++ b/docs/src/site/markdown/releases/apache-tez-0-8-1-alpha.md @@ -15,9 +15,9 @@ limitations under the License. --> -Apache Tez™ 0.8.1-alpha +Apache TEZ® 0.8.1-alpha -Apache Tez™ 0.8.1-alpha +Apache TEZ® 0.8.1-alpha ---------------------- - [Download Release Artifacts](http://archive.apache.org/dist/tez/0.8.1-alpha/) diff --git a/docs/src/site/markdown/releases/apache-tez-0-8-2.md b/docs/src/site/markdown/releases/apache-tez-0-8-2.md index 8c25155323..f7282ff1b6 100644 --- a/docs/src/site/markdown/releases/apache-tez-0-8-2.md +++ b/docs/src/site/markdown/releases/apache-tez-0-8-2.md @@ -15,9 +15,9 @@ limitations under the License. --> -Apache Tez™ 0.8.2 +Apache TEZ® 0.8.2 -Apache Tez™ 0.8.2 +Apache TEZ® 0.8.2 ---------------------- - [Download Release Artifacts](http://archive.apache.org/dist/tez/0.8.2/) diff --git a/docs/src/site/markdown/releases/apache-tez-0-8-3.md b/docs/src/site/markdown/releases/apache-tez-0-8-3.md index 5318652b6f..ca871779ba 100644 --- a/docs/src/site/markdown/releases/apache-tez-0-8-3.md +++ b/docs/src/site/markdown/releases/apache-tez-0-8-3.md @@ -15,9 +15,9 @@ limitations under the License. --> -Apache Tez™ 0.8.3 +Apache TEZ® 0.8.3 -Apache Tez™ 0.8.3 +Apache TEZ® 0.8.3 ---------------------- - [Download Release Artifacts](http://www.apache.org/dyn/closer.lua/tez/0.8.3/) diff --git a/docs/src/site/markdown/releases/apache-tez-0-8-4.md b/docs/src/site/markdown/releases/apache-tez-0-8-4.md index 2250dbee4c..802db939ab 100644 --- a/docs/src/site/markdown/releases/apache-tez-0-8-4.md +++ b/docs/src/site/markdown/releases/apache-tez-0-8-4.md @@ -15,9 +15,9 @@ limitations under the License. --> -Apache Tez™ 0.8.4 +Apache TEZ® 0.8.4 -Apache Tez™ 0.8.4 +Apache TEZ® 0.8.4 ---------------------- - [Download Release Artifacts](http://www.apache.org/dyn/closer.lua/tez/0.8.4/) diff --git a/docs/src/site/markdown/releases/apache-tez-0-8-5.md b/docs/src/site/markdown/releases/apache-tez-0-8-5.md index 9b73cec00a..cebc17a405 100644 --- a/docs/src/site/markdown/releases/apache-tez-0-8-5.md +++ b/docs/src/site/markdown/releases/apache-tez-0-8-5.md @@ -15,9 +15,9 @@ limitations under the License. --> -Apache Tez™ 0.8.5 +Apache TEZ® 0.8.5 -Apache Tez™ 0.8.5 +Apache TEZ® 0.8.5 ---------------------- - [Download Release Artifacts](http://www.apache.org/dyn/closer.lua/tez/0.8.5/) diff --git a/docs/src/site/markdown/releases/apache-tez-0-9-0.md b/docs/src/site/markdown/releases/apache-tez-0-9-0.md index 5059790dd8..12095ac2b6 100644 --- a/docs/src/site/markdown/releases/apache-tez-0-9-0.md +++ b/docs/src/site/markdown/releases/apache-tez-0-9-0.md @@ -15,9 +15,9 @@ limitations under the License. --> -Apache Tez™ 0.9.0 +Apache TEZ® 0.9.0 -Apache Tez™ 0.9.0 +Apache TEZ® 0.9.0 ---------------------- - [Download Release Artifacts](http://www.apache.org/dyn/closer.lua/tez/0.9.0/) diff --git a/docs/src/site/markdown/releases/apache-tez-0-9-1.md b/docs/src/site/markdown/releases/apache-tez-0-9-1.md new file mode 100644 index 0000000000..452bce5f5d --- /dev/null +++ b/docs/src/site/markdown/releases/apache-tez-0-9-1.md @@ -0,0 +1,30 @@ + + +Apache TEZ® 0.9.1 + +Apache TEZ® 0.9.1 +---------------------- + +- [Download Release Artifacts](http://www.apache.org/dyn/closer.lua/tez/0.9.1/) +- [Release Notes](0.9.1/release-notes.txt) +- Documentation + - [API Javadocs](0.9.1/tez-api-javadocs/index.html) : Documentation for the Tez APIs + - [Runtime Library Javadocs](0.9.1/tez-runtime-library-javadocs/index.html) : Documentation for built-in implementations of useful Inputs, Outputs, Processors etc. written based on the Tez APIs + - [Tez Mapreduce Javadocs](0.9.1/tez-mapreduce-javadocs/index.html) : Documentation for built-in implementations of Mapreduce compatible Inputs, Outputs, Processors etc. written based on the Tez APIs + - [Tez Configuration](0.9.1/tez-api-javadocs/configs/TezConfiguration.html) : Documentation for configurations of Tez. These configurations are typically specified in tez-site.xml. + - [Tez Runtime Configuration](0.9.1/tez-runtime-library-javadocs/configs/TezRuntimeConfiguration.html) : Documentation for runtime configurations of Tez. These configurations are typically specified by job submitters. diff --git a/docs/src/site/markdown/releases/apache-tez-0-9-2.md b/docs/src/site/markdown/releases/apache-tez-0-9-2.md new file mode 100644 index 0000000000..d29cd6d4e0 --- /dev/null +++ b/docs/src/site/markdown/releases/apache-tez-0-9-2.md @@ -0,0 +1,30 @@ + + +Apache TEZ® 0.9.2 + +Apache TEZ® 0.9.2 +---------------------- + +- [Download Release Artifacts](http://www.apache.org/dyn/closer.lua/tez/0.9.2/) +- [Release Notes](0.9.2/release-notes.txt) +- Documentation + - [API Javadocs](0.9.2/tez-api-javadocs/index.html) : Documentation for the Tez APIs + - [Runtime Library Javadocs](0.9.2/tez-runtime-library-javadocs/index.html) : Documentation for built-in implementations of useful Inputs, Outputs, Processors etc. written based on the Tez APIs + - [Tez Mapreduce Javadocs](0.9.2/tez-mapreduce-javadocs/index.html) : Documentation for built-in implementations of Mapreduce compatible Inputs, Outputs, Processors etc. written based on the Tez APIs + - [Tez Configuration](0.9.2/tez-api-javadocs/configs/TezConfiguration.html) : Documentation for configurations of Tez. These configurations are typically specified in tez-site.xml. + - [Tez Runtime Configuration](0.9.2/tez-runtime-library-javadocs/configs/TezRuntimeConfiguration.html) : Documentation for runtime configurations of Tez. These configurations are typically specified by job submitters. diff --git a/docs/src/site/markdown/releases/index.md b/docs/src/site/markdown/releases/index.md index 9e9df225ff..9be6f8bf12 100644 --- a/docs/src/site/markdown/releases/index.md +++ b/docs/src/site/markdown/releases/index.md @@ -15,24 +15,31 @@ limitations under the License. --> -Apache Tez™ Releases +Apache TEZ® Releases Releases ------------ -- [Apache Tez™ 0.9.0](./apache-tez-0-9-0.html) (Jul 27, 2017) -- [Apache Tez™ 0.8.5](./apache-tez-0-8-5.html) (Mar 13, 2017) -- [Apache Tez™ 0.8.4](./apache-tez-0-8-4.html) (Jul 08, 2016) -- [Apache Tez™ 0.8.3](./apache-tez-0-8-3.html) (Apr 15, 2016) -- [Apache Tez™ 0.8.2](./apache-tez-0-8-2.html) (Jan 19, 2016) -- [Apache Tez™ 0.8.1-alpha](./apache-tez-0-8-1-alpha.html) (Oct 12, 2015) -- [Apache Tez™ 0.8.0-alpha](./apache-tez-0-8-0-alpha.html) (Sep 01, 2015) -- [Apache Tez™ 0.7.1](./apache-tez-0-7-1.html) (May 10, 2016) -- [Apache Tez™ 0.7.0](./apache-tez-0-7-0.html) (May 18, 2015) -- [Apache Tez™ 0.6.2](./apache-tez-0-6-2.html) (Aug 07, 2015) -- [Apache Tez™ 0.6.1](./apache-tez-0-6-1.html) (May 18, 2015) -- [Apache Tez™ 0.6.0](./apache-tez-0-6-0.html) (Jan 23, 2015) -- [Apache Tez™ 0.5.4](./apache-tez-0-5-4.html) (Jun 26, 2015) -- [Apache Tez™ 0.5.3](./apache-tez-0-5-3.html) (Dec 10, 2014) -- [Apache Tez™ 0.5.2](./apache-tez-0-5-2.html) (Nov 07, 2014) -- [Apache Tez™ 0.5.1](./apache-tez-0-5-1.html) (Oct 08, 2014) -- [Apache Tez™ 0.5.0](./apache-tez-0-5-0.html) (Sep 04, 2014) +- [Apache TEZ® 0.10.4](./apache-tez-0-10-4.html) (Sep 15, 2024) +- [Apache TEZ® 0.10.3](./apache-tez-0-10-3.html) (Jan 31, 2024) +- [Apache TEZ® 0.10.2](./apache-tez-0-10-2.html) (Jul 30, 2022) +- [Apache TEZ® 0.10.1](./apache-tez-0-10-1.html) (Jul 01, 2021) +- [Apache TEZ® 0.10.0](./apache-tez-0-10-0.html) (Oct 15, 2020) +- [Apache TEZ® 0.9.2](./apache-tez-0-9-2.html) (Mar 29, 2019) +- [Apache TEZ® 0.9.1](./apache-tez-0-9-1.html) (Jan 04, 2018) +- [Apache TEZ® 0.9.0](./apache-tez-0-9-0.html) (Jul 27, 2017) +- [Apache TEZ® 0.8.5](./apache-tez-0-8-5.html) (Mar 13, 2017) +- [Apache TEZ® 0.8.4](./apache-tez-0-8-4.html) (Jul 08, 2016) +- [Apache TEZ® 0.8.3](./apache-tez-0-8-3.html) (Apr 15, 2016) +- [Apache TEZ® 0.8.2](./apache-tez-0-8-2.html) (Jan 19, 2016) +- [Apache TEZ® 0.8.1-alpha](./apache-tez-0-8-1-alpha.html) (Oct 12, 2015) +- [Apache TEZ® 0.8.0-alpha](./apache-tez-0-8-0-alpha.html) (Sep 01, 2015) +- [Apache TEZ® 0.7.1](./apache-tez-0-7-1.html) (May 10, 2016) +- [Apache TEZ® 0.7.0](./apache-tez-0-7-0.html) (May 18, 2015) +- [Apache TEZ® 0.6.2](./apache-tez-0-6-2.html) (Aug 07, 2015) +- [Apache TEZ® 0.6.1](./apache-tez-0-6-1.html) (May 18, 2015) +- [Apache TEZ® 0.6.0](./apache-tez-0-6-0.html) (Jan 23, 2015) +- [Apache TEZ® 0.5.4](./apache-tez-0-5-4.html) (Jun 26, 2015) +- [Apache TEZ® 0.5.3](./apache-tez-0-5-3.html) (Dec 10, 2014) +- [Apache TEZ® 0.5.2](./apache-tez-0-5-2.html) (Nov 07, 2014) +- [Apache TEZ® 0.5.1](./apache-tez-0-5-1.html) (Oct 08, 2014) +- [Apache TEZ® 0.5.0](./apache-tez-0-5-0.html) (Sep 04, 2014) diff --git a/docs/src/site/markdown/talks.md b/docs/src/site/markdown/talks.md index a29d553dfc..fe017bd048 100644 --- a/docs/src/site/markdown/talks.md +++ b/docs/src/site/markdown/talks.md @@ -21,12 +21,5 @@ Talks ----- - Apache Tez : Accelerating Hadoop Query Processing by Bikas Saha and Hitesh Shah at [Hadoop Summit 2014, San Jose, CA, USA](http://hadoopsummit.org/san-jose/) - - [Slides](http://www.slideshare.net/Hadoop_Summit/w-1205phall1saha) - - [Video](http://www.youtube.com/watch?v=yf_hBiZy3nk) - -User Meetup Recordings ----------------------- - -- [Recording](https://hortonworks.webex.com/hortonworks/ldr.php?AT=pb&SP=MC&rID=125516477&rKey=d147a3c924b64496) - from [Meetup on July 31st, 2013](http://www.meetup.com/Apache-Tez-User-Group/events/130852782/) - at [Hortonworks Inc](http://hortonworks.com) + - [Slides](https://www.slideshare.net/Hadoop_Summit/w-1205phall1saha) + - [Video](https://www.youtube.com/watch?v=yf_hBiZy3nk) diff --git a/docs/src/site/markdown/tez-ui.md b/docs/src/site/markdown/tez-ui.md index 3855fbbb97..423018fdf5 100644 --- a/docs/src/site/markdown/tez-ui.md +++ b/docs/src/site/markdown/tez-ui.md @@ -128,7 +128,7 @@ yarn-site.xml ... ``` -__For more detailed information (setup, configuration, deployment), please refer to the [Apache Hadoop Documentation on the Application Timeline Server](http://hadoop.apache.org/docs/current/hadoop-yarn/hadoop-yarn-site/TimelineServer.html)__ +__For more detailed information (setup, configuration, deployment), please refer to the [Apache Hadoop Documentation on the Application Timeline Server](https://hadoop.apache.org/docs/current/hadoop-yarn/hadoop-yarn-site/TimelineServer.html)__ __For general information on the compatibility matrix of the Tez UI with YARN TimelineServer, please refer to the [Tez - Timeline Server Guide](tez_yarn_timeline.html)__ diff --git a/docs/src/site/markdown/tez_acls.md b/docs/src/site/markdown/tez_acls.md index 52d9661b32..8a488e00c0 100644 --- a/docs/src/site/markdown/tez_acls.md +++ b/docs/src/site/markdown/tez_acls.md @@ -21,16 +21,16 @@ Access control in Tez can be categorized as follows: - - Modify permissions on the Tez AM ( or Session ). Users with this permision can: + - Modify permissions on the Tez AM ( or Session ). Users with this permission can: - Submit a DAG to a Tez Session - Kill any DAG within the given AM/Session - Kill the Session - - View permissions on the Tez AM ( or Session ). Users with this permision can: + - View permissions on the Tez AM ( or Session ). Users with this permission can: - Monitor/View the status of the Session - Monitor/View the progress/status of any DAG within the given AM/Session - - Modify permissions on a particular Tez DAG. Users with this permision can: + - Modify permissions on a particular Tez DAG. Users with this permission can: - Kill the DAG - - View permissions on a particular Tez DAG. Users with this permision can: + - View permissions on a particular Tez DAG. Users with this permission can: - Monitor/View the progress/status of the DAG From the above, you can see that All users/groups that have access to do operations on the AM also have access to similar operations on all DAGs within that AM/session. Also, by default, the owner of the Tez AM, i.e. the user who started the Tez AM, is considered a super-user and has access to all operations on the AM as well as all DAGs within the AM/Session. diff --git a/docs/src/site/markdown/tez_yarn_timeline.md b/docs/src/site/markdown/tez_yarn_timeline.md index 745f65c044..cbe28b4966 100644 --- a/docs/src/site/markdown/tez_yarn_timeline.md +++ b/docs/src/site/markdown/tez_yarn_timeline.md @@ -18,7 +18,7 @@ ## YARN Timeline Background -Initial support for [YARN Timeline](http://hadoop.apache.org/docs/r2.4.0/hadoop-yarn/hadoop-yarn-site/TimelineServer.html) was introduced in Apache Hadoop 2.4.0. Support for ACLs in Timeline was introduced in Apache Hadoop 2.6.0. Support for Timeline was introduced in Tez in 0.5.x ( with some experimental support in 0.4.x ). However, Tez ACLs integration with Timeline is only available from Tez 0.6.0 onwards. +Initial support for [YARN Timeline](https://hadoop.apache.org/docs/r2.4.0/hadoop-yarn/hadoop-yarn-site/TimelineServer.html) was introduced in Apache Hadoop 2.4.0. Support for ACLs in Timeline was introduced in Apache Hadoop 2.6.0. Support for Timeline was introduced in Tez in 0.5.x ( with some experimental support in 0.4.x ). However, Tez ACLs integration with Timeline is only available from Tez 0.6.0 onwards. ## How Tez Uses YARN Timeline diff --git a/docs/src/site/site.xml b/docs/src/site/site.xml index 91d15a911f..b69602f561 100644 --- a/docs/src/site/site.xml +++ b/docs/src/site/site.xml @@ -22,7 +22,7 @@ org.apache.maven.skins maven-fluido-skin - 1.3.0 + 1.9 @@ -42,13 +42,13 @@ ./images/ApacheTezLogo_lowres.png 25% 25% - http://tez.apache.org/ + https://tez.apache.org/ Apache Software Foundation - http://www.apache.org/images/asf_logo.gif - http://www.apache.org/ + https://apache.org/foundation/press/kit/asf_logo_wide.png + https://www.apache.org/ @@ -56,24 +56,23 @@ - - + ]]> - + - + @@ -116,43 +115,43 @@ - + - - - - - + - + + + - + - - - - + + + +
+ Apache Tez, Apache, the Apache feather logo, and the Apache Tez project logos are trademarks of The Apache Software Foundation. All other marks mentioned may be trademarks or registered trademarks of their respective owners.
+ ]]>
diff --git a/hadoop-shim-impls/hadoop-shim-2.7/pom.xml b/hadoop-shim-impls/hadoop-shim-2.7/pom.xml index 027ab4b0ef..b6681f5192 100644 --- a/hadoop-shim-impls/hadoop-shim-2.7/pom.xml +++ b/hadoop-shim-impls/hadoop-shim-2.7/pom.xml @@ -19,7 +19,7 @@ hadoop-shim-impls org.apache.tez - 0.9.1-SNAPSHOT + 0.10.5-SNAPSHOT 4.0.0 hadoop-shim-2.7 diff --git a/hadoop-shim-impls/hadoop-shim-2.8/pom.xml b/hadoop-shim-impls/hadoop-shim-2.8/pom.xml index cb9d63f063..027a5d4728 100644 --- a/hadoop-shim-impls/hadoop-shim-2.8/pom.xml +++ b/hadoop-shim-impls/hadoop-shim-2.8/pom.xml @@ -19,7 +19,7 @@ hadoop-shim-impls org.apache.tez - 0.9.1-SNAPSHOT + 0.10.5-SNAPSHOT 4.0.0 hadoop-shim-2.8 diff --git a/hadoop-shim-impls/pom.xml b/hadoop-shim-impls/pom.xml index c645eedc80..68131fb558 100644 --- a/hadoop-shim-impls/pom.xml +++ b/hadoop-shim-impls/pom.xml @@ -20,7 +20,7 @@ tez org.apache.tez - 0.9.1-SNAPSHOT + 0.10.5-SNAPSHOT hadoop-shim-impls pom @@ -29,7 +29,7 @@ hadoop27 - true + false hadoop-shim-2.7 @@ -38,7 +38,7 @@ hadoop28 - false + true hadoop-shim-2.8 diff --git a/hadoop-shim/pom.xml b/hadoop-shim/pom.xml index 63c68ddcb9..a2c7609d02 100644 --- a/hadoop-shim/pom.xml +++ b/hadoop-shim/pom.xml @@ -20,7 +20,7 @@ tez org.apache.tez - 0.9.1-SNAPSHOT + 0.10.5-SNAPSHOT hadoop-shim diff --git a/pom.xml b/pom.xml index 64acfceef4..494d732ff9 100644 --- a/pom.xml +++ b/pom.xml @@ -16,10 +16,15 @@ 4.0.0 + + org.apache + apache + 23 + org.apache.tez tez pom - 0.9.1-SNAPSHOT + 0.10.5-SNAPSHOT tez @@ -35,31 +40,69 @@ + + 1.8 + 1.8 true - ${user.home}/clover.license - 2.7.0 - 6.1.26 - 3.6.2.Final - 0.13.0 - 1.8 - 1.7.10 - [${javac.version},) + 3.0.0-M4 + false + UTF-8 + ${maven.build.timestamp} + apache.snapshots.https Apache Development Snapshot Repository https://repository.apache.org/content/repositories/snapshots apache.staging.https Apache Release Distribution Repository https://repository.apache.org/service/local/staging/deploy/maven2 - false - 2.5.0 + + + 2.12.3 + 1.78 + 1.8 + 1.1 + 8.35 + 1.3.2 + 1.6.0 + 1.15 + 4.4 + 2.16.0 + 2.6 + ${user.home}/clover.license + 3.2.0 + 3.0.5 + 1.8.0 + 32.0.1-jre + 3.4.1 + 1.8 + 1.19.4 + 1.5.4 + 3.0.0 + 4.13.2 + 1.8 + 1.0.0 + 3.1.1 + 3.0.0 + 3.1.2 + 3.2.4 + 3.12.1 + 3.1.0 + 4.3.1 + 4.1.116.Final + 0.13.0 + 3.25.5 + 3.11.4 ${env.PROTOC_PATH} - UTF-8 - scm:git:https://git-wip-us.apache.org/repos/asf/tez.git - ${maven.build.timestamp} - 1.4 - 3.0.1 - 2.10.4 - 2.4.3 + 2.0.0 + 1.2.1 + scm:git:https://gitbox.apache.org/repos/asf/tez.git + 3.1.0 + 1.7.36 + 1.1.10.4 + ${project.build.directory}/tmp + 1.7.9 + + true ${scm.url} @@ -76,6 +119,10 @@ ${distMgmtSnapshotsName} ${distMgmtSnapshotsUrl} + + tez.apache.org + scp://tez.apache.org + @@ -83,6 +130,9 @@ ${distMgmtSnapshotsId} ${distMgmtSnapshotsName} ${distMgmtSnapshotsUrl} + + false + @@ -98,6 +148,9 @@ ${distMgmtSnapshotsName} ${distMgmtSnapshotsUrl} default + + false + @@ -187,6 +240,11 @@ tez-yarn-timeline-history ${project.version} + + org.apache.tez + tez-protobuf-history-plugin + ${project.version} + org.apache.tez tez-yarn-timeline-history @@ -206,12 +264,12 @@ io.dropwizard.metrics metrics-core - 3.1.0 + ${metrics-core.version} org.roaringbitmap RoaringBitmap - 0.4.9 + ${roaringbitmap.version} org.slf4j @@ -219,57 +277,46 @@ ${slf4j.version} - com.ning - async-http-client - 1.8.16 + org.asynchttpclient + async-http-client + ${asynchttpclient.version} + + + io.netty + * + + org.slf4j - slf4j-log4j12 + slf4j-reload4j ${slf4j.version} commons-io commons-io - 2.4 + ${commons-io.version} commons-lang commons-lang - 2.6 - - - org.mortbay.jetty - jetty - compile - ${jetty.version} + ${commons-lang.version} io.netty - netty + netty-all compile ${netty.version} - - org.mortbay.jetty - jetty-util - compile - ${jetty.version} - javax.servlet - servlet-api - 2.5 + javax.servlet-api + ${servlet-api.version} commons-codec commons-codec - 1.4 - - - commons-collections - commons-collections - 3.2.2 + ${commons-codec.version} org.apache.hadoop @@ -284,18 +331,6 @@ commons-logging commons-logging-api - - org.mortbay.jetty - jetty - - - org.mortbay.jetty - jetty-util - - - org.mortbay.jetty - servlet-api-2.5 - com.sun.jersey jersey-core @@ -344,12 +379,27 @@ commons-el commons-el + + + io.netty + * + + + org.xerial.snappy + snappy-java + org.apache.hadoop hadoop-auth ${hadoop.version} + + + io.netty + netty + + org.apache.hadoop @@ -366,18 +416,6 @@ commons-logging commons-logging-api - - org.mortbay.jetty - jetty - - - org.mortbay.jetty - jetty-util - - - org.mortbay.jetty - servlet-api-2.5 - com.sun.jersey jersey-core @@ -472,7 +510,7 @@ ${hadoop.version} - org.mortbay.jetty + org.eclipse.jetty jetty-util @@ -490,6 +528,10 @@ com.sun.jersey jersey-server + + org.eclipse.jetty + * + @@ -503,6 +545,12 @@ org.apache.hadoop hadoop-yarn-server-web-proxy ${hadoop.version} + + + org.eclipse.jetty + * + + org.apache.hadoop @@ -568,6 +616,10 @@ org.apache.hadoop hadoop-yarn-server-common + + io.netty + netty + @@ -584,6 +636,10 @@ org.apache.hadoop hadoop-mapreduce-client-shuffle + + io.netty + netty + @@ -609,6 +665,10 @@ org.apache.hadoop hadoop-mapreduce-client-common + + io.netty + netty + @@ -617,6 +677,12 @@ test test-jar ${hadoop.version} + + + io.netty + netty + + org.apache.hadoop @@ -632,8 +698,8 @@ avro - org.mortbay.jetty - jetty + org.eclipse.jetty + * com.sun.jersey @@ -655,38 +721,38 @@ tomcat jasper-runtime + + io.netty + netty + org.apache.hadoop hadoop-hdfs - test-jar ${hadoop.version} + test-jar + test org.mockito - mockito-all - 1.10.8 + mockito-core + ${mockito-core.version} org.apache.commons commons-collections4 - 4.1 - - - org.apache.commons - commons-math3 - 3.1.1 + ${commons-collections4.version} commons-cli commons-cli - 1.2 + ${commons-cli.version} junit junit - 4.11 + ${junit.version} test @@ -697,38 +763,49 @@ com.google.guava guava - 11.0.2 + ${guava.version} org.codehaus.jettison jettison - 1.3.4 + ${jettison.version} com.google.code.findbugs jsr305 - 3.0.0 + ${jsr305.version} com.sun.jersey jersey-client - 1.9 + ${jersey.version} com.sun.jersey jersey-json - 1.9 + ${jersey.version} + + + org.bouncycastle + bcprov-jdk18on + ${bouncycastle.version} + test org.bouncycastle - bcprov-jdk16 - 1.46 + bcpkix-jdk18on + ${bouncycastle.version} test org.fusesource.leveldbjni leveldbjni-all - 1.8 + ${leveldbjni-all.version} + + + org.xerial.snappy + snappy-java + ${snappy-java.version} @@ -736,6 +813,7 @@ hadoop-shim tez-api + tez-build-tools tez-common tez-runtime-library tez-runtime-internals @@ -755,24 +833,9 @@ - - org.apache.maven.plugins - maven-compiler-plugin - 3.1 - - ${javac.version} - ${javac.version} - - - - org.apache.maven.plugins - maven-enforce-plugin - 1.4 - org.apache.maven.plugins maven-jar-plugin - 2.4 @@ -800,17 +863,11 @@ - - org.apache.maven.plugins - maven-source-plugin - 2.2.1 - org.apache.maven.plugins maven-javadoc-plugin - ${javadoc-maven-plugin.version} - -Xdoclint:none + none @@ -818,11 +875,6 @@ hadoop-maven-plugins ${hadoop.version} - - org.apache.maven.plugins - maven-assembly-plugin - 2.4 - com.github.eirslett frontend-maven-plugin @@ -831,22 +883,11 @@ exec-maven-plugin org.codehaus.mojo - 1.3.2 - - - org.apache.maven.plugins - maven-war-plugin - 2.5 - - - org.apache.maven.plugins - maven-shade-plugin - ${shade-maven-plugin.version} + ${codehaus.mojo.version} org.apache.rat apache-rat-plugin - 0.10 CHANGES.txt @@ -873,10 +914,14 @@ + + com.github.os72 + protoc-jar-maven-plugin + ${protoc-jar-maven-plugin.version} + org.apache.maven.plugins maven-surefire-plugin - 2.14.1 1 false @@ -887,43 +932,52 @@ 4 + ${test.build.data} true ${hadoop.version} - - org.codehaus.mojo - findbugs-maven-plugin - ${findbugs-maven-plugin.version} - org.apache.maven.plugins - maven-resources-plugin - 2.6 + maven-checkstyle-plugin + ${maven-checkstyle-plugin.version} + + + org.apache.tez + tez-build-tools + ${project.version} + + + com.puppycrawl.tools + checkstyle + ${checkstyle.version} + + - UTF-8 + checkstyle/checkstyle.xml + checkstyle/suppressions.xml + true + false + xml + html + ${project.build.directory}/test/checkstyle-errors.xml - org.apache.maven.plugins - maven-site-plugin - 3.4 - - - org.apache.maven.plugins - maven-deploy-plugin - 2.8.1 + org.owasp + dependency-check-maven + ${dependency-check-maven.version} - org.apache.maven.plugins - maven-gpg-plugin - 1.4 + org.codehaus.mojo + findbugs-maven-plugin + ${findbugs-maven-plugin.version} org.codehaus.mojo build-helper-maven-plugin - 1.8 + ${build-helper-maven-plugin.version} validate @@ -937,7 +991,7 @@ org.codehaus.mojo buildnumber-maven-plugin - 1.1 + ${buildnumber-maven-plugin.version} validate @@ -954,7 +1008,7 @@ org.eclipse.m2e lifecycle-mapping - 1.0.0 + ${lifecycle-mapping.version} @@ -975,23 +1029,18 @@ - - org.apache.maven.plugins - maven-project-info-reports-plugin - 2.7 - - false - - - - org.apache.maven.plugins - maven-clean-plugin - 3.0.0 - - + ro.isdc.wro4j wro4j-maven-plugin - 1.7.9 + ${wro4j-maven-plugin.version} + + + + org.mockito + mockito-core + 2.18.0 + + @@ -1000,8 +1049,14 @@ org.apache.maven.plugins maven-enforcer-plugin - 1.4 - false + ${maven-enforcer-plugin.version} + + + de.skuzzle.enforcer + restrict-imports-enforcer-rule + ${restrict-imports.enforcer.version} + + clean @@ -1011,14 +1066,32 @@ - [3.0.2,) + [3.6.3,) - ${enforced.java.version} + [${maven.compiler.target},) + + banned-illegal-imports + process-sources + + enforce + + + + + true + Use Fasterxml Jackson 2 dependency in place of org.codehaus Jackson 1 + + org.codehaus.jackson.** + + + + + @@ -1052,6 +1125,16 @@ ${basedir}/findbugs-exclude.xml + + + org.owasp + dependency-check-maven + @@ -1202,7 +1285,6 @@ org.apache.maven.plugins maven-javadoc-plugin - ${javadoc-maven-plugin.version} true @@ -1230,8 +1312,6 @@ maven-compiler-plugin true - ${javac.version} - ${javac.version} 9999 @@ -1250,7 +1330,7 @@ jdk.tools jdk.tools - 1.8 + ${jdk.tools.version} system ${java.home}/../lib/tools.jar @@ -1263,6 +1343,12 @@ aws + + org.apache.hadoop + hadoop-cloud-storage + runtime + ${hadoop.version} + org.apache.hadoop hadoop-aws @@ -1277,22 +1363,34 @@ azure + + org.apache.hadoop + hadoop-cloud-storage + runtime + ${hadoop.version} + org.apache.hadoop hadoop-azure runtime ${hadoop.version} + + org.apache.hadoop + hadoop-azure-datalake + runtime + ${hadoop.version} + + org.apache.maven.plugins maven-javadoc-plugin - ${javadoc-maven-plugin.version} aggregate diff --git a/tez-api/pom.xml b/tez-api/pom.xml index 78dd415572..4e6c27e18f 100644 --- a/tez-api/pom.xml +++ b/tez-api/pom.xml @@ -20,10 +20,14 @@ org.apache.tez tez - 0.9.1-SNAPSHOT + 0.10.5-SNAPSHOT tez-api + + false + + org.slf4j @@ -107,9 +111,18 @@ org.mockito - mockito-all + mockito-core + test + + + org.bouncycastle + bcprov-jdk18on test + + org.xerial.snappy + snappy-java + @@ -141,30 +154,26 @@ apache-rat-plugin - org.apache.hadoop - hadoop-maven-plugins + com.github.os72 + protoc-jar-maven-plugin - compile-protoc generate-sources - protoc + run - ${protobuf.version} + com.google.protobuf:protoc:${protobuf.version} ${protoc.path} - - ${basedir}/src/main/proto - - - ${basedir}/src/main/proto - - DAGApiRecords.proto - DAGClientAMProtocol.proto - Events.proto - - - ${project.build.directory}/generated-sources/java + none + + ${basedir}/src/main/proto + + + + ${project.build.directory}/generated-sources/java + + @@ -207,7 +216,7 @@ hadoop27 - true + false @@ -219,12 +228,21 @@ hadoop28 + + true + org.apache.hadoop hadoop-hdfs-client ${hadoop.version} + + org.apache.hadoop + hadoop-hdfs + test + ${hadoop.version} + diff --git a/tez-api/src/main/java/org/apache/tez/client/AMConfiguration.java b/tez-api/src/main/java/org/apache/tez/client/AMConfiguration.java index 238b155010..1b61c45d05 100644 --- a/tez-api/src/main/java/org/apache/tez/client/AMConfiguration.java +++ b/tez-api/src/main/java/org/apache/tez/client/AMConfiguration.java @@ -23,7 +23,6 @@ import org.apache.hadoop.classification.InterfaceAudience.Private; import org.apache.hadoop.security.Credentials; import org.apache.hadoop.yarn.api.records.LocalResource; -import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.tez.dag.api.TezConfiguration; import com.google.common.collect.Maps; @@ -34,7 +33,6 @@ class AMConfiguration { private Map amLocalResources = Maps.newHashMap(); private TezConfiguration tezConf; private Credentials credentials; - private YarnConfiguration yarnConfig; private LocalResource binaryConfLRsrc; AMConfiguration(TezConfiguration tezConf, Map localResources, @@ -64,10 +62,6 @@ void setCredentials(Credentials credentials) { void setTezConfiguration(TezConfiguration tezConf) { this.tezConf = tezConf; } - - void setYarnConfiguration(YarnConfiguration yarnConf) { - this.yarnConfig = yarnConf; - } String getQueueName() { return this.tezConf.get(TezConfiguration.TEZ_QUEUE_NAME); @@ -81,10 +75,6 @@ TezConfiguration getTezConfiguration() { return tezConf; } - YarnConfiguration getYarnConfiguration() { - return yarnConfig; - } - Credentials getCredentials() { return credentials; } diff --git a/tez-api/src/main/java/org/apache/tez/client/CallerContext.java b/tez-api/src/main/java/org/apache/tez/client/CallerContext.java index 809cf47d8e..f6c67cfe9c 100644 --- a/tez-api/src/main/java/org/apache/tez/client/CallerContext.java +++ b/tez-api/src/main/java/org/apache/tez/client/CallerContext.java @@ -24,7 +24,7 @@ import org.apache.hadoop.classification.InterfaceAudience.Public; import org.apache.hadoop.classification.InterfaceStability.Unstable; -import com.google.common.base.Preconditions; +import org.apache.tez.common.Preconditions; @Public @Unstable diff --git a/tez-api/src/main/java/org/apache/tez/client/FrameworkClient.java b/tez-api/src/main/java/org/apache/tez/client/FrameworkClient.java index b3e084c27c..a1a0b8a5e8 100644 --- a/tez-api/src/main/java/org/apache/tez/client/FrameworkClient.java +++ b/tez-api/src/main/java/org/apache/tez/client/FrameworkClient.java @@ -21,21 +21,39 @@ import java.io.IOException; import org.apache.hadoop.classification.InterfaceAudience.Private; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.ApplicationReport; import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext; import org.apache.hadoop.yarn.client.api.YarnClient; import org.apache.hadoop.yarn.client.api.YarnClientApplication; -import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.exceptions.YarnException; +import org.apache.tez.common.RPCUtil; import org.apache.tez.common.ReflectionUtils; +import org.apache.tez.dag.api.DAG; +import org.apache.tez.dag.api.DAGSubmissionTimedOut; +import org.apache.tez.dag.api.DagTypeConverters; import org.apache.tez.dag.api.TezConfiguration; import org.apache.tez.dag.api.TezException; import org.apache.tez.dag.api.TezReflectionException; import org.apache.tez.dag.api.TezUncheckedException; +import org.apache.tez.dag.api.client.DAGClient; +import org.apache.tez.dag.api.client.DAGClientImpl; +import org.apache.tez.dag.api.client.rpc.DAGClientAMProtocolBlockingPB; +import org.apache.tez.dag.api.client.rpc.DAGClientAMProtocolRPC.GetAMStatusRequestProto; +import org.apache.tez.dag.api.client.rpc.DAGClientAMProtocolRPC.GetAMStatusResponseProto; +import org.apache.tez.dag.api.client.rpc.DAGClientAMProtocolRPC.ShutdownSessionRequestProto; +import org.apache.tez.dag.api.client.rpc.DAGClientAMProtocolRPC.SubmitDAGRequestProto; +import org.apache.tez.dag.api.client.rpc.DAGClientAMProtocolRPC.SubmitDAGResponseProto; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.google.protobuf.ServiceException; @Private public abstract class FrameworkClient { + protected static final Logger LOG = LoggerFactory.getLogger(FrameworkClient.class); public static FrameworkClient createFrameworkClient(TezConfiguration tezConf) { @@ -58,10 +76,8 @@ public static FrameworkClient createFrameworkClient(TezConfiguration tezConf) { * * @param tezConf the {@link org.apache.tez.dag.api.TezConfiguration} instance being used by the * cluster - * @param yarnConf the {@link org.apache.hadoop.yarn.conf.YarnConfiguration} instance being used - * by the cluster */ - public abstract void init(TezConfiguration tezConf, YarnConfiguration yarnConf); + public abstract void init(TezConfiguration tezConf); public abstract void start(); @@ -78,6 +94,104 @@ public abstract ApplicationId submitApplication(ApplicationSubmissionContext app public abstract ApplicationReport getApplicationReport(ApplicationId appId) throws YarnException, IOException; + public abstract String getAmHost(); + public abstract int getAmPort(); + public abstract boolean isRunning() throws IOException; + public TezAppMasterStatus getAMStatus(Configuration conf, ApplicationId appId, + UserGroupInformation ugi) throws TezException, ServiceException, IOException { + DAGClientAMProtocolBlockingPB proxy = getProxy(conf, appId, ugi); + + if (proxy == null) { + return TezAppMasterStatus.INITIALIZING; + } + GetAMStatusResponseProto response = + proxy.getAMStatus(null, GetAMStatusRequestProto.newBuilder().build()); + return DagTypeConverters.convertTezAppMasterStatusFromProto(response.getStatus()); + } + + public DAGClient submitDag(DAG dag, SubmitDAGRequestProto request, String clientName, + ApplicationId sessionAppId, long clientTimeout, UserGroupInformation ugi, TezConfiguration tezConf) + throws IOException, TezException, DAGSubmissionTimedOut { + DAGClientAMProtocolBlockingPB proxy = null; + try { + proxy = waitForProxy(clientTimeout, tezConf, sessionAppId, ugi); + } catch (InterruptedException e) { + throw new IOException("Interrupted while trying to create a connection to the AM", e); + } + if (proxy == null) { + try { + LOG.warn("DAG submission to session timed out, stopping session"); + stop(); + } catch (Throwable t) { + LOG.info("Got an exception when trying to stop session", t); + } + throw new DAGSubmissionTimedOut("Could not submit DAG to Tez Session" + + ", timed out after " + clientTimeout + " seconds"); + } + + String dagId = null; + try { + SubmitDAGResponseProto response = proxy.submitDAG(null, request); + // the following check is only for testing since the final class + // SubmitDAGResponseProto cannot be mocked + if (response != null) { + dagId = response.getDagId(); + } + } catch (ServiceException e) { + RPCUtil.unwrapAndThrowException(e); + } + + LOG.info("Submitted dag to TezSession" + + ", sessionName=" + clientName + + ", applicationId=" + sessionAppId + + ", dagId=" + dagId + + ", dagName=" + dag.getName()); + return getDAGClient(sessionAppId, dagId, tezConf, ugi); + } + + protected DAGClientAMProtocolBlockingPB waitForProxy(long clientTimeout, Configuration conf, + ApplicationId sessionAppId, UserGroupInformation ugi) + throws IOException, TezException, InterruptedException { + long startTime = System.currentTimeMillis(); + long endTime = startTime + (clientTimeout * 1000); + DAGClientAMProtocolBlockingPB proxy = null; + while (true) { + proxy = TezClientUtils.getAMProxy(this, conf, sessionAppId, ugi); + if (proxy != null) { + break; + } + Thread.sleep(100L); + if (clientTimeout != -1 && System.currentTimeMillis() > endTime) { + break; + } + } + return proxy; + } + + /** + * Shuts down session and returns a boolean=true if a proxy was successfully created and through + * that proxy a shutdownSession was called. + */ + public boolean shutdownSession(Configuration conf, ApplicationId sessionAppId, + UserGroupInformation ugi) throws TezException, IOException, ServiceException { + DAGClientAMProtocolBlockingPB proxy = getProxy(conf, sessionAppId, ugi); + if (proxy != null) { + ShutdownSessionRequestProto request = ShutdownSessionRequestProto.newBuilder().build(); + proxy.shutdownSession(null, request); + return true; + } + return false; + } + + protected DAGClientAMProtocolBlockingPB getProxy(Configuration conf, ApplicationId sessionAppId, + UserGroupInformation ugi) throws TezException, IOException { + return TezClientUtils.getAMProxy(this, conf, sessionAppId, ugi); + } + + public DAGClient getDAGClient(ApplicationId appId, String dagId, TezConfiguration tezConf, + UserGroupInformation ugi) { + return new DAGClientImpl(appId, dagId, tezConf, this, ugi); + } } diff --git a/tez-api/src/main/java/org/apache/tez/client/TezClient.java b/tez-api/src/main/java/org/apache/tez/client/TezClient.java index 65ce0fb6ed..937346cacd 100644 --- a/tez-api/src/main/java/org/apache/tez/client/TezClient.java +++ b/tez-api/src/main/java/org/apache/tez/client/TezClient.java @@ -22,7 +22,6 @@ import java.net.InetAddress; import java.net.UnknownHostException; import java.text.NumberFormat; -import java.util.TimerTask; import java.util.concurrent.Executors; import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.atomic.AtomicBoolean; @@ -30,15 +29,17 @@ import java.util.concurrent.TimeUnit; import java.util.HashMap; import java.util.Map; +import java.util.Objects; import javax.annotation.Nullable; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.CommonConfigurationKeys; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.security.UserGroupInformation; import org.apache.tez.common.JavaOptsChecker; -import org.apache.tez.common.RPCUtil; import org.apache.tez.common.TezCommonUtils; import org.apache.tez.common.counters.Limits; import org.apache.tez.dag.api.TezConfigurationConstants; @@ -56,7 +57,6 @@ import org.apache.hadoop.yarn.api.records.LocalResource; import org.apache.hadoop.yarn.api.records.LocalResourceType; import org.apache.hadoop.yarn.api.records.YarnApplicationState; -import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.exceptions.ApplicationNotFoundException; import org.apache.hadoop.yarn.exceptions.YarnException; import org.apache.hadoop.util.Time; @@ -75,15 +75,11 @@ import org.apache.tez.dag.api.client.DAGClient; import org.apache.tez.dag.api.client.rpc.DAGClientAMProtocolBlockingPB; import org.apache.tez.dag.api.client.rpc.DAGClientAMProtocolRPC.GetAMStatusRequestProto; -import org.apache.tez.dag.api.client.rpc.DAGClientAMProtocolRPC.GetAMStatusResponseProto; -import org.apache.tez.dag.api.client.rpc.DAGClientAMProtocolRPC.ShutdownSessionRequestProto; import org.apache.tez.dag.api.client.rpc.DAGClientAMProtocolRPC.SubmitDAGRequestProto; -import org.apache.tez.dag.api.client.rpc.DAGClientAMProtocolRPC.SubmitDAGResponseProto; -import org.apache.tez.dag.api.client.DAGClientImpl; import org.apache.tez.dag.api.records.DAGProtos.DAGPlan; import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; +import org.apache.tez.common.Preconditions; import com.google.common.collect.Maps; import com.google.common.util.concurrent.ThreadFactoryBuilder; import com.google.protobuf.ServiceException; @@ -109,7 +105,11 @@ public class TezClient { private static final Logger LOG = LoggerFactory.getLogger(TezClient.class); - + + private static final String appIdStrPrefix = "application"; + private static final String APPLICATION_ID_PREFIX = appIdStrPrefix + '_'; + private static final long PREWARM_WAIT_MS = 500; + @VisibleForTesting static final String NO_CLUSTER_DIAGNOSTICS_MSG = "No cluster diagnostics found."; @@ -119,7 +119,8 @@ public class TezClient { private ApplicationId lastSubmittedAppId; @VisibleForTesting final AMConfiguration amConfig; - private FrameworkClient frameworkClient; + @VisibleForTesting + FrameworkClient frameworkClient; private String diagnostics; @VisibleForTesting final boolean isSession; @@ -139,7 +140,7 @@ public class TezClient { @VisibleForTesting final ServicePluginsDescriptor servicePluginsDescriptor; private JavaOptsChecker javaOptsChecker = null; - + private DAGClient prewarmDagClient = null; private int preWarmDAGCounter = 0; /* max submitDAG request size through IPC; beyond this we transfer them in the same way we transfer local resource */ @@ -150,9 +151,11 @@ public class TezClient { private ScheduledExecutorService amKeepAliveService; + private final Map ugiMap; + private TezClient(String name, TezConfiguration tezConf) { this(name, tezConf, tezConf.getBoolean( - TezConfiguration.TEZ_AM_SESSION_MODE, TezConfiguration.TEZ_AM_SESSION_MODE_DEFAULT)); + TezConfiguration.TEZ_AM_SESSION_MODE, TezConfiguration.TEZ_AM_SESSION_MODE_DEFAULT)); } @Private @@ -193,6 +196,7 @@ protected TezClient(String name, TezConfiguration tezConf, boolean isSession, LOG.warn("The host name of the client the tez application was submitted from was unable to be retrieved", e); } + this.ugiMap = new HashMap<>(); this.amConfig = new AMConfiguration(tezConf, localResources, credentials); this.apiVersionInfo = new TezApiVersionInfo(); this.servicePluginsDescriptor = servicePluginsDescriptor; @@ -299,7 +303,7 @@ public static TezClient create(String name, TezConfiguration tezConf, boolean is } /** - * Add local files for the DAG App Master. These may be files, archives, + * Add local files for the DAG App Master. These may be files, archives, * jars etc.
*

* In non-session mode these will be added to the files of the App Master @@ -317,17 +321,17 @@ public static TezClient create(String name, TezConfiguration tezConf, boolean is * accumulate across DAG submissions and are never removed from the classpath. * Only LocalResourceType.FILE is supported. All files will be treated as * private. - * + * * @param localFiles the files to be made available in the AM */ public synchronized void addAppMasterLocalFiles(Map localFiles) { - Preconditions.checkNotNull(localFiles); + Objects.requireNonNull(localFiles); if (isSession && sessionStarted.get()) { additionalLocalResources.putAll(localFiles); } amConfig.addAMLocalResources(localFiles); } - + /** * If the next DAG App Master needs different local files, then use this * method to clear the local files and then add the new local files @@ -337,7 +341,7 @@ public synchronized void addAppMasterLocalFiles(Map local public synchronized void clearAppMasterLocalFiles() { amConfig.clearAMLocalResources(); } - + /** * Set security credentials to be used inside the app master, if needed. Tez App * Master needs credentials to access the staging directory and for most HDFS @@ -347,7 +351,7 @@ public synchronized void clearAppMasterLocalFiles() { * credentials must be supplied by the user. These will be used by the App * Master for the next DAG.
In session mode, credentials, if needed, must be * set before calling start() - * + * * @param credentials credentials */ public synchronized void setAppMasterCredentials(Credentials credentials) { @@ -376,100 +380,184 @@ public synchronized void setHistoryLogLevel(HistoryLogLevel historyLogLevel) { * @throws IOException */ public synchronized void start() throws TezException, IOException { - amConfig.setYarnConfiguration(new YarnConfiguration(amConfig.getTezConfiguration())); - - frameworkClient = createFrameworkClient(); - frameworkClient.init(amConfig.getTezConfiguration(), amConfig.getYarnConfiguration()); - frameworkClient.start(); - - if (this.amConfig.getTezConfiguration().getBoolean( - TezConfiguration.TEZ_CLIENT_JAVA_OPTS_CHECKER_ENABLED, - TezConfiguration.TEZ_CLIENT_JAVA_OPTS_CHECKER_ENABLED_DEFAULT)) { - String javaOptsCheckerClassName = this.amConfig.getTezConfiguration().get( - TezConfiguration.TEZ_CLIENT_JAVA_OPTS_CHECKER_CLASS, ""); - if (!javaOptsCheckerClassName.isEmpty()) { - try { - javaOptsChecker = ReflectionUtils.createClazzInstance(javaOptsCheckerClassName); - } catch (Exception e) { - LOG.warn("Failed to initialize configured Java Opts Checker" - + " (" + TezConfiguration.TEZ_CLIENT_JAVA_OPTS_CHECKER_CLASS - + ") , checkerClass=" + javaOptsCheckerClassName - + ". Disabling checker.", e); - javaOptsChecker = null; - } - } else { - javaOptsChecker = new JavaOptsChecker(); - } - - } - + startFrameworkClient(); + setupJavaOptsChecker(); if (isSession) { LOG.info("Session mode. Starting session."); TezClientUtils.processTezLocalCredentialsFile(sessionCredentials, amConfig.getTezConfiguration()); - - Map tezJarResources = getTezJarResources(sessionCredentials); - + clientTimeout = amConfig.getTezConfiguration().getInt( TezConfiguration.TEZ_SESSION_CLIENT_TIMEOUT_SECS, TezConfiguration.TEZ_SESSION_CLIENT_TIMEOUT_SECS_DEFAULT); - + try { if (sessionAppId == null) { sessionAppId = createApplication(); } - - // Add session token for shuffle - TezClientUtils.createSessionToken(sessionAppId.toString(), - jobTokenSecretManager, sessionCredentials); - - ApplicationSubmissionContext appContext = - TezClientUtils.createApplicationSubmissionContext( - sessionAppId, - null, clientName, amConfig, - tezJarResources, sessionCredentials, usingTezArchiveDeploy, apiVersionInfo, - servicePluginsDescriptor, javaOptsChecker); - - // Set Tez Sessions to not retry on AM crashes if recovery is disabled - if (!amConfig.getTezConfiguration().getBoolean( - TezConfiguration.DAG_RECOVERY_ENABLED, - TezConfiguration.DAG_RECOVERY_ENABLED_DEFAULT)) { - appContext.setMaxAppAttempts(1); - } + + ApplicationSubmissionContext appContext = setupApplicationContext(); frameworkClient.submitApplication(appContext); ApplicationReport appReport = frameworkClient.getApplicationReport(sessionAppId); LOG.info("The url to track the Tez Session: " + appReport.getTrackingUrl()); sessionStarted.set(true); } catch (YarnException e) { + cleanStagingDir(); + throw new TezException(e); + } catch (IOException e) { + cleanStagingDir(); throw new TezException(e); } - long amClientKeepAliveTimeoutIntervalMillis = - TezCommonUtils.getAMClientHeartBeatTimeoutMillis(amConfig.getTezConfiguration()); - // Poll at minimum of 1 second interval - long pollPeriod = TezCommonUtils. - getAMClientHeartBeatPollIntervalMillis(amConfig.getTezConfiguration(), - amClientKeepAliveTimeoutIntervalMillis, 10); - - boolean isLocal = amConfig.getTezConfiguration().getBoolean( - TezConfiguration.TEZ_LOCAL_MODE, TezConfiguration.TEZ_LOCAL_MODE_DEFAULT); - if (!isLocal && amClientKeepAliveTimeoutIntervalMillis > 0) { - amKeepAliveService = Executors.newSingleThreadScheduledExecutor( - new ThreadFactoryBuilder() - .setDaemon(true).setNameFormat("AMKeepAliveThread #%d").build()); - amKeepAliveService.scheduleWithFixedDelay(new Runnable() { - - private DAGClientAMProtocolBlockingPB proxy; - - @Override - public void run() { - proxy = sendAMHeartbeat(proxy); - } - }, pollPeriod, pollPeriod, TimeUnit.MILLISECONDS); + startClientHeartbeat(); + this.stagingFs = FileSystem.get(amConfig.getTezConfiguration()); + } + } + + private void cleanStagingDir() { + Configuration conf = amConfig.getTezConfiguration(); + String appId = sessionAppId.toString(); + Path stagingDir = TezCommonUtils.getTezSystemStagingPath(conf, appId); + boolean isStgDeleted = false; + try { + FileSystem fs = stagingDir.getFileSystem(conf); + isStgDeleted = fs.delete(stagingDir, true); + } catch (IOException ioe) { + LOG.error("Error deleting staging dir " + stagingDir, ioe); + } finally { + LOG.info("Staging dir {}, deleted:{} ", stagingDir, isStgDeleted); + } + } + + public synchronized TezClient getClient(String appIdStr) throws IOException, TezException { + return getClient(appIdfromString(appIdStr)); + } + + /** + * Alternative to start() that explicitly sets sessionAppId and doesn't start a new AM. + * The caller of getClient is responsible for initializing the new TezClient with a + * Configuration compatible with the existing AM. It is expected the caller has cached the + * original Configuration (e.g. in Zookeeper). + * + * In contrast to "start", no resources are localized. It is the responsibility of the caller to + * ensure that existing localized resources and staging dirs are still valid. + * + * @param appId + * @return 'this' just as a convenience for fluent style chaining + */ + public synchronized TezClient getClient(ApplicationId appId) throws TezException, IOException { + sessionAppId = appId; + startFrameworkClient(); + setupJavaOptsChecker(); + + if (!isSession) { + String msg = "Must be in session mode to bind TezClient to existing AM"; + LOG.error(msg); + throw new IllegalStateException(msg); + } + + LOG.info("Session mode. Reconnecting to session: " + sessionAppId.toString()); + + clientTimeout = amConfig.getTezConfiguration().getInt( + TezConfiguration.TEZ_SESSION_CLIENT_TIMEOUT_SECS, + TezConfiguration.TEZ_SESSION_CLIENT_TIMEOUT_SECS_DEFAULT); + + try { + setupApplicationContext(); + ApplicationReport appReport = frameworkClient.getApplicationReport(sessionAppId); + LOG.info("The url to track the Tez Session: " + appReport.getTrackingUrl()); + sessionStarted.set(true); + } catch (YarnException e) { + cleanStagingDir(); + throw new TezException(e); + } catch (IOException e) { + cleanStagingDir(); + throw new TezException(e); + } + + startClientHeartbeat(); + this.stagingFs = FileSystem.get(amConfig.getTezConfiguration()); + return this; + } + + private void startFrameworkClient() { + frameworkClient = createFrameworkClient(); + frameworkClient.init(amConfig.getTezConfiguration()); + frameworkClient.start(); + } + + private ApplicationSubmissionContext setupApplicationContext() throws IOException, YarnException { + TezClientUtils.processTezLocalCredentialsFile(sessionCredentials, + amConfig.getTezConfiguration()); + + Map tezJarResources = getTezJarResources(sessionCredentials); + // Add session token for shuffle + TezClientUtils.createSessionToken(sessionAppId.toString(), + jobTokenSecretManager, sessionCredentials); + + ApplicationSubmissionContext appContext = + TezClientUtils.createApplicationSubmissionContext( + sessionAppId, + null, clientName, amConfig, + tezJarResources, sessionCredentials, usingTezArchiveDeploy, apiVersionInfo, + servicePluginsDescriptor, javaOptsChecker); + + // Set Tez Sessions to not retry on AM crashes if recovery is disabled + if (!amConfig.getTezConfiguration().getBoolean( + TezConfiguration.DAG_RECOVERY_ENABLED, + TezConfiguration.DAG_RECOVERY_ENABLED_DEFAULT)) { + appContext.setMaxAppAttempts(1); + } + return appContext; + } + + private void setupJavaOptsChecker() { + if (this.amConfig.getTezConfiguration().getBoolean( + TezConfiguration.TEZ_CLIENT_JAVA_OPTS_CHECKER_ENABLED, + TezConfiguration.TEZ_CLIENT_JAVA_OPTS_CHECKER_ENABLED_DEFAULT)) { + String javaOptsCheckerClassName = this.amConfig.getTezConfiguration().get( + TezConfiguration.TEZ_CLIENT_JAVA_OPTS_CHECKER_CLASS, ""); + if (!javaOptsCheckerClassName.isEmpty()) { + try { + javaOptsChecker = ReflectionUtils.createClazzInstance(javaOptsCheckerClassName); + } catch (Exception e) { + LOG.warn("Failed to initialize configured Java Opts Checker" + + " (" + TezConfiguration.TEZ_CLIENT_JAVA_OPTS_CHECKER_CLASS + + ") , checkerClass=" + javaOptsCheckerClassName + + ". Disabling checker.", e); + javaOptsChecker = null; + } + } else { + javaOptsChecker = new JavaOptsChecker(); } - this.stagingFs = FileSystem.get(amConfig.getTezConfiguration()); + } + } + + private void startClientHeartbeat() { + long amClientKeepAliveTimeoutIntervalMillis = + TezCommonUtils.getAMClientHeartBeatTimeoutMillis(amConfig.getTezConfiguration()); + // Poll at minimum of 1 second interval + long pollPeriod = TezCommonUtils. + getAMClientHeartBeatPollIntervalMillis(amConfig.getTezConfiguration(), + amClientKeepAliveTimeoutIntervalMillis, 10); + + boolean isLocal = amConfig.getTezConfiguration().getBoolean( + TezConfiguration.TEZ_LOCAL_MODE, TezConfiguration.TEZ_LOCAL_MODE_DEFAULT); + if (!isLocal && amClientKeepAliveTimeoutIntervalMillis > 0) { + amKeepAliveService = Executors.newSingleThreadScheduledExecutor( + new ThreadFactoryBuilder() + .setDaemon(true).setNameFormat("AMKeepAliveThread #%d").build()); + amKeepAliveService.scheduleWithFixedDelay(new Runnable() { + + private DAGClientAMProtocolBlockingPB proxy; + + @Override + public void run() { + proxy = sendAMHeartbeat(proxy); + } + }, pollPeriod, pollPeriod, TimeUnit.MILLISECONDS); } } @@ -481,7 +569,8 @@ public DAGClientAMProtocolBlockingPB sendAMHeartbeat(DAGClientAMProtocolBlocking try { if (proxy == null) { try { - proxy = waitForProxy(); + proxy = frameworkClient.waitForProxy(clientTimeout, amConfig.getTezConfiguration(), + sessionAppId, getUgi()); } catch (InterruptedException e) { LOG.debug("Interrupted while trying to create a connection to the AM", e); } catch (SessionNotRunning e) { @@ -508,7 +597,7 @@ public DAGClientAMProtocolBlockingPB sendAMHeartbeat(DAGClientAMProtocolBlocking * cluster.
In session mode, it submits the DAG to the session App Master. It * blocks until either the DAG is submitted to the session or configured * timeout period expires. Cleans up session if the submission timed out. - * + * * @param dag * DAG to be submitted to Session * @return DAGClient to monitor the DAG @@ -516,23 +605,51 @@ public DAGClientAMProtocolBlockingPB sendAMHeartbeat(DAGClientAMProtocolBlocking * @throws IOException * @throws DAGSubmissionTimedOut * if submission timed out - */ + */ public synchronized DAGClient submitDAG(DAG dag) throws TezException, IOException { - if (isSession) { - return submitDAGSession(dag); - } else { - return submitDAGApplication(dag); + DAGClient result = isSession ? submitDAGSession(dag) : submitDAGApplication(dag); + if (result != null) { + closePrewarmDagClient(); // Assume the current DAG replaced the prewarm one; no need to kill. } + return result; + } + + private void killAndClosePrewarmDagClient(long waitTimeMs) { + if (prewarmDagClient == null) { + return; + } + try { + prewarmDagClient.tryKillDAG(); + if (waitTimeMs > 0) { + LOG.info("Waiting for prewarm DAG to shut down"); + prewarmDagClient.waitForCompletion(waitTimeMs); + } + } + catch (Exception ex) { + LOG.warn("Failed to shut down the prewarm DAG " + prewarmDagClient, ex); + } + closePrewarmDagClient(); + } + + private void closePrewarmDagClient() { + if (prewarmDagClient == null) { + return; + } + try { + prewarmDagClient.close(); + } catch (Exception e) { + LOG.warn("Failed to close prewarm DagClient " + prewarmDagClient, e); + } + prewarmDagClient = null; } private DAGClient submitDAGSession(DAG dag) throws TezException, IOException { - Preconditions.checkState(isSession == true, - "submitDAG with additional resources applies to only session mode. " + + Preconditions.checkState(isSession, + "submitDAG with additional resources applies to only session mode. " + "In non-session mode please specify all resources in the initial configuration"); - + verifySessionStateForSubmission(); - String dagId = null; String callerContextStr = ""; if (dag.getCallerContext() != null) { callerContextStr = ", callerContext=" + dag.getCallerContext().contextAsSimpleString(); @@ -542,7 +659,7 @@ private DAGClient submitDAGSession(DAG dag) throws TezException, IOException { + ", applicationId=" + sessionAppId + ", dagName=" + dag.getName() + callerContextStr); - + if (!additionalLocalResources.isEmpty()) { for (LocalResource lr : additionalLocalResources.values()) { Preconditions.checkArgument(lr.getType() == LocalResourceType.FILE, "LocalResourceType: " @@ -560,7 +677,7 @@ private DAGClient submitDAGSession(DAG dag) throws TezException, IOException { requestBuilder.setAdditionalAmResources(DagTypeConverters .convertFromLocalResources(additionalLocalResources)); } - + additionalLocalResources.clear(); // if request size exceeds maxSubmitDAGRequestSizeThroughIPC, we serialize them to HDFS @@ -570,54 +687,31 @@ private DAGClient submitDAGSession(DAG dag) throws TezException, IOException { sessionAppId.toString()), TezConstants.TEZ_PB_PLAN_BINARY_NAME + serializedSubmitDAGPlanRequestCounter.incrementAndGet()); - try (FSDataOutputStream fsDataOutputStream = stagingFs.create(dagPlanPath, false)) { + FileSystem fs = dagPlanPath.getFileSystem(stagingFs.getConf()); + try (FSDataOutputStream fsDataOutputStream = fs.create(dagPlanPath, false)) { LOG.info("Send dag plan using YARN local resources since it's too large" + ", dag plan size=" + request.getSerializedSize() + ", max dag plan size through IPC=" + maxSubmitDAGRequestSizeThroughIPC + ", max IPC message size= " + amConfig.getTezConfiguration().getInt( CommonConfigurationKeys.IPC_MAXIMUM_DATA_LENGTH, CommonConfigurationKeys.IPC_MAXIMUM_DATA_LENGTH_DEFAULT)); request.writeTo(fsDataOutputStream); - request = requestBuilder.clear().setSerializedRequestPath(stagingFs.resolvePath(dagPlanPath).toString()).build(); } + request = requestBuilder.clear().setSerializedRequestPath(fs.resolvePath(dagPlanPath).toString()).build(); } - DAGClientAMProtocolBlockingPB proxy = null; - try { - proxy = waitForProxy(); - } catch (InterruptedException e) { - throw new IOException("Interrupted while trying to create a connection to the AM", e); - } - if (proxy == null) { - try { - LOG.warn("DAG submission to session timed out, stopping session"); - stop(); - } catch (Throwable t) { - LOG.info("Got an exception when trying to stop session", t); - } - throw new DAGSubmissionTimedOut("Could not submit DAG to Tez Session" - + ", timed out after " + clientTimeout + " seconds"); - } + return frameworkClient.submitDag(dag, request, clientName, sessionAppId, clientTimeout, + getUgi(), amConfig.getTezConfiguration()); + } - try { - SubmitDAGResponseProto response = proxy.submitDAG(null, request); - // the following check is only for testing since the final class - // SubmitDAGResponseProto cannot be mocked - if (response != null) { - dagId = response.getDagId(); - } - } catch (ServiceException e) { - RPCUtil.unwrapAndThrowException(e); - } + private UserGroupInformation getUgi() throws IOException { + String userName = UserGroupInformation.getCurrentUser().getUserName(); + return ugiMap.computeIfAbsent(userName, + v -> UserGroupInformation.createRemoteUser(userName)); + } - LOG.info("Submitted dag to TezSession" - + ", sessionName=" + clientName - + ", applicationId=" + sessionAppId - + ", dagId=" + dagId - + ", dagName=" + dag.getName()); - return new DAGClientImpl(sessionAppId, dagId, - amConfig.getTezConfiguration(), - amConfig.getYarnConfiguration(), - frameworkClient); + @VisibleForTesting + protected long getPrewarmWaitTimeMs() { + return PREWARM_WAIT_MS; } /** @@ -627,6 +721,7 @@ private DAGClient submitDAGSession(DAG dag) throws TezException, IOException { * @throws IOException */ public synchronized void stop() throws TezException, IOException { + killAndClosePrewarmDagClient(getPrewarmWaitTimeMs()); try { if (amKeepAliveService != null) { amKeepAliveService.shutdownNow(); @@ -638,39 +733,34 @@ public synchronized void stop() throws TezException, IOException { sessionStopped.set(true); boolean sessionShutdownSuccessful = false; try { - DAGClientAMProtocolBlockingPB proxy = getAMProxy(sessionAppId); - if (proxy != null) { - ShutdownSessionRequestProto request = - ShutdownSessionRequestProto.newBuilder().build(); - proxy.shutdownSession(null, request); - sessionShutdownSuccessful = true; - boolean asynchronousStop = amConfig.getTezConfiguration().getBoolean( - TezConfiguration.TEZ_CLIENT_ASYNCHRONOUS_STOP, - TezConfiguration.TEZ_CLIENT_ASYNCHRONOUS_STOP_DEFAULT); - if (!asynchronousStop) { - LOG.info("Waiting until application is in a final state"); - long currentTimeMillis = System.currentTimeMillis(); - long timeKillIssued = currentTimeMillis; - long killTimeOut = amConfig.getTezConfiguration().getLong( - TezConfiguration.TEZ_CLIENT_HARD_KILL_TIMEOUT_MS, - TezConfiguration.TEZ_CLIENT_HARD_KILL_TIMEOUT_MS_DEFAULT); - ApplicationReport appReport = frameworkClient - .getApplicationReport(sessionAppId); - while ((currentTimeMillis < timeKillIssued + killTimeOut) - && !isJobInTerminalState(appReport.getYarnApplicationState())) { - try { - Thread.sleep(1000L); - } catch (InterruptedException ie) { - /** interrupted, just break */ - break; - } - currentTimeMillis = System.currentTimeMillis(); - appReport = frameworkClient.getApplicationReport(sessionAppId); + sessionShutdownSuccessful = frameworkClient + .shutdownSession(amConfig.getTezConfiguration(), sessionAppId, getUgi()); + boolean asynchronousStop = amConfig.getTezConfiguration().getBoolean( + TezConfiguration.TEZ_CLIENT_ASYNCHRONOUS_STOP, + TezConfiguration.TEZ_CLIENT_ASYNCHRONOUS_STOP_DEFAULT); + if (!asynchronousStop && sessionShutdownSuccessful) { + LOG.info("Waiting until application is in a final state"); + long currentTimeMillis = System.currentTimeMillis(); + long timeKillIssued = currentTimeMillis; + long killTimeOut = amConfig.getTezConfiguration().getLong( + TezConfiguration.TEZ_CLIENT_HARD_KILL_TIMEOUT_MS, + TezConfiguration.TEZ_CLIENT_HARD_KILL_TIMEOUT_MS_DEFAULT); + ApplicationReport appReport = frameworkClient + .getApplicationReport(sessionAppId); + while ((currentTimeMillis < timeKillIssued + killTimeOut) + && !isJobInTerminalState(appReport.getYarnApplicationState())) { + try { + Thread.sleep(1000L); + } catch (InterruptedException ie) { + /** interrupted, just break */ + break; } + currentTimeMillis = System.currentTimeMillis(); + appReport = frameworkClient.getApplicationReport(sessionAppId); + } - if (!isJobInTerminalState(appReport.getYarnApplicationState())) { - frameworkClient.killApplication(sessionAppId); - } + if (!isJobInTerminalState(appReport.getYarnApplicationState())) { + frameworkClient.killApplication(sessionAppId); } } } catch (TezException e) { @@ -714,7 +804,7 @@ private boolean isJobInTerminalState(YarnApplicationState yarnApplicationState) public String getClientName() { return clientName; } - + @Private @VisibleForTesting public synchronized ApplicationId getAppMasterApplicationId() { @@ -727,16 +817,16 @@ public synchronized ApplicationId getAppMasterApplicationId() { /** * Get the status of the App Master executing the DAG - * In non-session mode it returns the status of the last submitted DAG App Master + * In non-session mode it returns the status of the last submitted DAG App Master * In session mode, it returns the status of the App Master hosting the session - * + * * @return State of the session * @throws TezException * @throws IOException */ public synchronized TezAppMasterStatus getAppMasterStatus() throws TezException, IOException { - // Supporting per-DAG app master case since user may choose to run the same - // code in that mode and the code should continue to work. Its easy to provide + // Supporting per-DAG app master case since user may choose to run the same + // code in that mode and the code should continue to work. Its easy to provide // the correct view for per-DAG app master too. ApplicationId appId = null; if (isSession) { @@ -765,14 +855,7 @@ public synchronized TezAppMasterStatus getAppMasterStatus() throws TezException, return TezAppMasterStatus.SHUTDOWN; case RUNNING: try { - DAGClientAMProtocolBlockingPB proxy = getAMProxy(appId); - if (proxy == null) { - return TezAppMasterStatus.INITIALIZING; - } - GetAMStatusResponseProto response = proxy.getAMStatus(null, - GetAMStatusRequestProto.newBuilder().build()); - return DagTypeConverters.convertTezAppMasterStatusFromProto( - response.getStatus()); + return frameworkClient.getAMStatus(amConfig.getTezConfiguration(), appId, getUgi()); } catch (TezException e) { LOG.info("Failed to retrieve AM Status via proxy", e); } catch (ServiceException e) { @@ -786,21 +869,21 @@ public synchronized TezAppMasterStatus getAppMasterStatus() throws TezException, } return TezAppMasterStatus.INITIALIZING; } - + /** * API to help pre-allocate containers in session mode. In non-session mode - * this is ignored. The pre-allocated containers may be re-used by subsequent - * job DAGs to improve performance. + * this is ignored. The pre-allocated containers may be re-used by subsequent + * job DAGs to improve performance. * The preWarm vertex should be configured and setup exactly - * like the other vertices in the job DAGs so that the pre-allocated containers + * like the other vertices in the job DAGs so that the pre-allocated containers * may be re-used by the subsequent DAGs to improve performance. * The processor for the preWarmVertex may be used to pre-warm the containers - * by pre-loading classes etc. It should be short-running so that pre-warming + * by pre-loading classes etc. It should be short-running so that pre-warming * does not block real execution. Users can specify their custom processors or * use the PreWarmProcessor from the runtime library. * The parallelism of the preWarmVertex will determine the number of preWarmed * containers. - * Pre-warming is best efforts and among other factors is limited by the free + * Pre-warming is best efforts and among other factors is limited by the free * resources on the cluster. * @param preWarmVertex * @throws TezException @@ -846,7 +929,7 @@ public synchronized void preWarm(PreWarmVertex preWarmVertex, } verifySessionStateForSubmission(); - + DAG dag = org.apache.tez.dag.api.DAG.create(TezConstants.TEZ_PREWARM_DAG_NAME_PREFIX + "_" + preWarmDAGCounter++); dag.addVertex(preWarmVertex); @@ -859,13 +942,13 @@ public synchronized void preWarm(PreWarmVertex preWarmVertex, "available", e); } if(isReady) { - submitDAG(dag); + prewarmDagClient = submitDAG(dag); } else { throw new SessionNotReady("Tez AM not ready, could not submit DAG"); } } - + /** * Wait till the DAG is ready to be submitted. * In non-session mode this is a no-op since the application can be immediately @@ -873,7 +956,7 @@ public synchronized void preWarm(PreWarmVertex preWarmVertex, * In session mode, this waits for the session host to be ready to accept a DAG * @throws IOException * @throws TezException - * @throws InterruptedException + * @throws InterruptedException */ @Evolving public synchronized void waitTillReady() throws IOException, TezException, InterruptedException { @@ -951,32 +1034,6 @@ protected FrameworkClient createFrameworkClient() { return FrameworkClient.createFrameworkClient(amConfig.getTezConfiguration()); } - @VisibleForTesting - // for testing - protected DAGClientAMProtocolBlockingPB getAMProxy(ApplicationId appId) - throws TezException, IOException { - return TezClientUtils.getAMProxy( - frameworkClient, amConfig.getYarnConfiguration(), appId); - } - - private DAGClientAMProtocolBlockingPB waitForProxy() - throws IOException, TezException, InterruptedException { - long startTime = System.currentTimeMillis(); - long endTime = startTime + (clientTimeout * 1000); - DAGClientAMProtocolBlockingPB proxy = null; - while (true) { - proxy = getAMProxy(sessionAppId); - if (proxy != null) { - break; - } - Thread.sleep(100l); - if (clientTimeout != -1 && System.currentTimeMillis() > endTime) { - break; - } - } - return proxy; - } - private void verifySessionStateForSubmission() throws SessionNotRunning { Preconditions.checkState(isSession, "Invalid without session mode"); if (!sessionStarted.get()) { @@ -985,7 +1042,7 @@ private void verifySessionStateForSubmission() throws SessionNotRunning { throw new SessionNotRunning("Session stopped by user"); } } - + private DAGClient submitDAGApplication(DAG dag) throws TezException, IOException { ApplicationId appId = createApplication(); @@ -998,7 +1055,7 @@ DAGClient submitDAGApplication(ApplicationId appId, DAG dag) LOG.info("Submitting DAG application with id: " + appId); try { // Use the AMCredentials object in client mode, since this won't be re-used. - // Ensures we don't fetch credentially unnecessarily if the user has already provided them. + // Ensures we don't fetch credentials unnecessarily if the user has already provided them. Credentials credentials = amConfig.getCredentials(); if (credentials == null) { credentials = new Credentials(); @@ -1024,7 +1081,7 @@ DAGClient submitDAGApplication(ApplicationId appId, DAG dag) + ", applicationId=" + appId + ", dagName=" + dag.getName() + callerContextStr); - + TezCommonUtils.logCredentials(LOG, credentials, "appContext"); frameworkClient.submitApplication(appContext); ApplicationReport appReport = frameworkClient.getApplicationReport(appId); LOG.info("The url to track the Tez AM: " + appReport.getTrackingUrl()); @@ -1034,8 +1091,7 @@ DAGClient submitDAGApplication(ApplicationId appId, DAG dag) } // wait for dag in non-session mode to start running, so that we can start to getDAGStatus waitNonSessionTillReady(); - return getDAGClient(appId, amConfig.getTezConfiguration(), amConfig.getYarnConfiguration(), - frameworkClient); + return getDAGClient(appId, amConfig.getTezConfiguration(), frameworkClient, getUgi()); } private ApplicationId createApplication() throws TezException, IOException { @@ -1058,18 +1114,9 @@ private synchronized Map getTezJarResources(Credentials c } @Private - static DAGClient getDAGClient(ApplicationId appId, TezConfiguration tezConf, YarnConfiguration - yarnConf, FrameworkClient frameworkClient) - throws IOException, TezException { - return new DAGClientImpl(appId, getDefaultTezDAGID(appId), tezConf, - yarnConf, frameworkClient); - } - - @Private // Used only for MapReduce compatibility code static DAGClient getDAGClient(ApplicationId appId, TezConfiguration tezConf, - FrameworkClient frameworkClient) - throws IOException, TezException { - return getDAGClient(appId, tezConf, new YarnConfiguration(tezConf), frameworkClient); + FrameworkClient frameworkClient, UserGroupInformation ugi) throws IOException, TezException { + return frameworkClient.getDAGClient(appId, getDefaultTezDAGID(appId), tezConf, ugi); } // DO NOT CHANGE THIS. This code is replicated from TezDAGID.java @@ -1211,4 +1258,40 @@ public TezClient build() { servicePluginsDescriptor); } } + + //Copied this helper method from + //org.apache.hadoop.yarn.api.records.ApplicationId in Hadoop 2.8+ + //to simplify implementation on 2.7.x + @Public + @Unstable + public static ApplicationId appIdfromString(String appIdStr) { + if (!appIdStr.startsWith(APPLICATION_ID_PREFIX)) { + throw new IllegalArgumentException("Invalid ApplicationId prefix: " + + appIdStr + ". The valid ApplicationId should start with prefix " + + appIdStrPrefix); + } + try { + int pos1 = APPLICATION_ID_PREFIX.length() - 1; + int pos2 = appIdStr.indexOf('_', pos1 + 1); + if (pos2 < 0) { + throw new IllegalArgumentException("Invalid ApplicationId: " + + appIdStr); + } + long rmId = Long.parseLong(appIdStr.substring(pos1 + 1, pos2)); + int appId = Integer.parseInt(appIdStr.substring(pos2 + 1)); + ApplicationId applicationId = ApplicationId.newInstance(rmId, appId); + return applicationId; + } catch (NumberFormatException n) { + throw new IllegalArgumentException("Invalid ApplicationId: " + + appIdStr, n); + } + } + + public String getAmHost() { + return frameworkClient == null ? null : frameworkClient.getAmHost(); + } + + public int getAmPort() { + return frameworkClient == null ? -1 : frameworkClient.getAmPort(); + } } diff --git a/tez-api/src/main/java/org/apache/tez/client/TezClientUtils.java b/tez-api/src/main/java/org/apache/tez/client/TezClientUtils.java index f9316e5cf4..bae0174b3b 100644 --- a/tez-api/src/main/java/org/apache/tez/client/TezClientUtils.java +++ b/tez-api/src/main/java/org/apache/tez/client/TezClientUtils.java @@ -19,6 +19,7 @@ package org.apache.tez.client; import java.io.File; +import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; import java.net.InetSocketAddress; @@ -27,7 +28,9 @@ import java.nio.ByteBuffer; import java.security.PrivilegedExceptionAction; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collection; +import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; @@ -37,11 +40,13 @@ import java.util.TreeMap; import java.util.Vector; import java.util.Map.Entry; +import java.util.Objects; import com.google.common.base.Strings; import org.apache.commons.codec.digest.DigestUtils; import org.apache.commons.lang.StringUtils; -import org.apache.commons.math3.util.Precision; +import org.apache.hadoop.fs.LocatedFileStatus; +import org.apache.hadoop.fs.RemoteIterator; import org.apache.tez.common.JavaOptsChecker; import org.apache.tez.dag.api.records.DAGProtos.AMPluginDescriptorProto; import org.apache.tez.serviceplugins.api.ServicePluginsDescriptor; @@ -110,44 +115,39 @@ import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Function; -import com.google.common.base.Preconditions; import com.google.common.collect.Iterators; import com.google.common.collect.Lists; @Private -public class TezClientUtils { +public final class TezClientUtils { - private static Logger LOG = LoggerFactory.getLogger(TezClientUtils.class); + private static final Logger LOG = LoggerFactory.getLogger(TezClientUtils.class); private static final int UTF8_CHUNK_SIZE = 16 * 1024; - private static FileStatus[] getLRFileStatus(String fileName, Configuration conf) throws - IOException { - URI uri; + private TezClientUtils() {} + + private static RemoteIterator getListFilesFileStatus(String configUri, Configuration conf) + throws IOException { + Path p = getPath(configUri); + FileSystem fs = p.getFileSystem(conf); + p = fs.resolvePath(p.makeQualified(fs.getUri(), fs.getWorkingDirectory())); + FileSystem targetFS = p.getFileSystem(conf); + return targetFS.listFiles(p, false); + } + + private static Path getPath(String configUri) { try { - uri = new URI(fileName); + return new Path(new URI(configUri)); } catch (URISyntaxException e) { - String message = "Invalid URI defined in configuration for" - + " location of TEZ jars. providedURI=" + fileName; + String message = "Invalid URI defined in configuration for" + " location of TEZ jars. providedURI=" + configUri; LOG.error(message); throw new TezUncheckedException(message, e); } - - Path p = new Path(uri); - FileSystem fs = p.getFileSystem(conf); - p = fs.resolvePath(p.makeQualified(fs.getUri(), - fs.getWorkingDirectory())); - FileSystem targetFS = p.getFileSystem(conf); - if (targetFS.isDirectory(p)) { - return targetFS.listStatus(p); - } else { - FileStatus fStatus = targetFS.getFileStatus(p); - return new FileStatus[]{fStatus}; - } } /** * Setup LocalResource map for Tez jars based on provided Configuration - * + * * @param conf * Configuration to use to access Tez jars' locations * @param credentials @@ -160,11 +160,11 @@ private static FileStatus[] getLRFileStatus(String fileName, Configuration conf) static boolean setupTezJarsLocalResources(TezConfiguration conf, Credentials credentials, Map tezJarResources) throws IOException { - Preconditions.checkNotNull(credentials, "A non-null credentials object should be specified"); + Objects.requireNonNull(credentials, "A non-null credentials object should be specified"); boolean usingTezArchive = false; if (conf.getBoolean(TezConfiguration.TEZ_IGNORE_LIB_URIS, false)){ - LOG.info("Ignoring '" + TezConfiguration.TEZ_LIB_URIS + "' since '" + + LOG.info("Ignoring '" + TezConfiguration.TEZ_LIB_URIS + "' since '" + TezConfiguration.TEZ_IGNORE_LIB_URIS + "' is set to true"); } else { // Add tez jars to local resource @@ -230,15 +230,11 @@ private static boolean addLocalResources(Configuration conf, } else { type = LocalResourceType.FILE; } + RemoteIterator fileStatuses = getListFilesFileStatus(configUri, conf); - FileStatus [] fileStatuses = getLRFileStatus(configUri, conf); - - for (FileStatus fStatus : fileStatuses) { + while (fileStatuses.hasNext()) { + LocatedFileStatus fStatus = fileStatuses.next(); String linkName; - if (fStatus.isDirectory()) { - // Skip directories - no recursive search support. - continue; - } // If the resource is an archive, we've already done this work if(type != LocalResourceType.ARCHIVE) { u = fStatus.getPath().toUri(); @@ -247,8 +243,7 @@ private static boolean addLocalResources(Configuration conf, p = remoteFS.resolvePath(p.makeQualified(remoteFS.getUri(), remoteFS.getWorkingDirectory())); if(null != u.getFragment()) { - LOG.warn("Fragment set for link being interpreted as a file," + - "URI: " + u.toString()); + LOG.warn("Fragment set for link being interpreted as a file, URI: {}", u); } } @@ -321,7 +316,7 @@ static void processTezLocalCredentialsFile(Credentials credentials, Configuratio /** * Verify or create the Staging area directory on the configured Filesystem * @param stagingArea Staging area directory path - * @return the FileSytem for the staging area directory + * @return the FileSystem for the staging area directory * @throws IOException */ public static FileSystem ensureStagingDirExists(Configuration conf, @@ -333,8 +328,13 @@ public static FileSystem ensureStagingDirExists(Configuration conf, UserGroupInformation ugi = UserGroupInformation.getLoginUser(); realUser = ugi.getShortUserName(); currentUser = UserGroupInformation.getCurrentUser().getShortUserName(); - if (fs.exists(stagingArea)) { - FileStatus fsStatus = fs.getFileStatus(stagingArea); + FileStatus fsStatus = null; + try { + fsStatus = fs.getFileStatus(stagingArea); + } catch (FileNotFoundException fnf) { + // Ignore + } + if (fsStatus != null) { String owner = fsStatus.getOwner(); if (!(owner.equals(currentUser) || owner.equals(realUser))) { throw new IOException("The ownership on the staging directory " @@ -354,7 +354,7 @@ public static FileSystem ensureStagingDirExists(Configuration conf, } return fs; } - + /** * Populate {@link Credentials} for the URI's to access them from their {@link FileSystem}s * @param uris URIs that need to be accessed @@ -382,7 +382,7 @@ public Path apply(URI input) { * Obtains tokens for the DAG based on the list of URIs setup in the DAG. The * fetched credentials are populated back into the DAG and can be retrieved * via dag.getCredentials - * + * * @param dag * the dag for which credentials need to be setup * @param sessionCredentials @@ -395,15 +395,14 @@ public Path apply(URI input) { static Credentials setupDAGCredentials(DAG dag, Credentials sessionCredentials, Configuration conf) throws IOException { - Preconditions.checkNotNull(sessionCredentials); + Objects.requireNonNull(sessionCredentials); TezCommonUtils.logCredentials(LOG, sessionCredentials, "session"); - Credentials dagCredentials = new Credentials(); // All session creds are required for the DAG. dagCredentials.mergeAll(sessionCredentials); - + // Add additional credentials based on any URIs that the user may have specified. - + // Obtain Credentials for any paths that the user may have configured. addFileSystemCredentialsFromURIs(dag.getURIsForCredentials(), dagCredentials, conf); @@ -423,7 +422,7 @@ static Credentials setupDAGCredentials(DAG dag, Credentials sessionCredentials, addFileSystemCredentialsFromURIs(dataSink.getURIsForCredentials(), dagCredentials, conf); } } - + for (LocalResource lr: dag.getTaskLocalFiles().values()) { lrPaths.add(ConverterUtils.getPathFromYarnURL(lr.getResource())); } @@ -434,7 +433,7 @@ static Credentials setupDAGCredentials(DAG dag, Credentials sessionCredentials, } catch (URISyntaxException e) { throw new IOException(e); } - + return dagCredentials; } @@ -461,7 +460,7 @@ public static ApplicationSubmissionContext createApplicationSubmissionContext( ServicePluginsDescriptor servicePluginsDescriptor, JavaOptsChecker javaOptsChecker) throws IOException, YarnException { - Preconditions.checkNotNull(sessionCreds); + Objects.requireNonNull(sessionCreds); TezConfiguration conf = amConfig.getTezConfiguration(); FileSystem fs = TezClientUtils.ensureStagingDirExists(conf, @@ -479,24 +478,13 @@ public static ApplicationSubmissionContext createApplicationSubmissionContext( capability.setVirtualCores( amConfig.getTezConfiguration().getInt(TezConfiguration.TEZ_AM_RESOURCE_CPU_VCORES, TezConfiguration.TEZ_AM_RESOURCE_CPU_VCORES_DEFAULT)); - if (LOG.isDebugEnabled()) { - LOG.debug("AppMaster capability = " + capability); - } + LOG.debug("AppMaster capability = {}", capability); // Setup required Credentials for the AM launch. DAG specific credentials // are handled separately. ByteBuffer securityTokens = null; - // Setup security tokens - Credentials amLaunchCredentials = new Credentials(); - if (amConfig.getCredentials() != null) { - amLaunchCredentials.addAll(amConfig.getCredentials()); - } - - // Add Staging dir creds to the list of session credentials. - TokenCache.obtainTokensForFileSystems(sessionCreds, new Path[]{binaryConfPath}, conf); - - // Add session specific credentials to the AM credentials. - amLaunchCredentials.mergeAll(sessionCreds); + Credentials amLaunchCredentials = + prepareAmLaunchCredentials(amConfig, sessionCreds, conf, binaryConfPath); DataOutputBuffer dob = new DataOutputBuffer(); amLaunchCredentials.writeTokenStorageToStream(dob); @@ -515,7 +503,7 @@ public static ApplicationSubmissionContext createApplicationSubmissionContext( String[] amLogParams = parseLogParams(amLogLevelString); String amLogLevel = amLogParams[0]; - maybeAddDefaultLoggingJavaOpts(amLogLevel, vargs); + TezClientUtils.addLog4jSystemProperties(amLogLevel, vargs); // FIX sun bug mentioned in TEZ-327 @@ -540,10 +528,7 @@ public static ApplicationSubmissionContext createApplicationSubmissionContext( } vargsFinal.add(mergedCommand.toString()); - if (LOG.isDebugEnabled()) { - LOG.debug("Command to launch container for ApplicationMaster is : " - + mergedCommand); - } + LOG.debug("Command to launch container for ApplicationMaster is : {}", mergedCommand); Map environment = new TreeMap(); TezYARNUtils.setupDefaultEnv(environment, conf, @@ -575,16 +560,19 @@ public static ApplicationSubmissionContext createApplicationSubmissionContext( } // emit conf as PB file - ConfigurationProto finalConfProto = createFinalConfProtoForApp(tezConf, - servicePluginsDescriptor); + // don't overwrite existing conf, needed for TezClient.getClient() so existing containers have stable resource fingerprints + if(!binaryConfPath.getFileSystem(tezConf).exists(binaryConfPath)) { + ConfigurationProto finalConfProto = createFinalConfProtoForApp(tezConf, + servicePluginsDescriptor); - FSDataOutputStream amConfPBOutBinaryStream = null; - try { - amConfPBOutBinaryStream = TezCommonUtils.createFileForAM(fs, binaryConfPath); - finalConfProto.writeTo(amConfPBOutBinaryStream); - } finally { - if(amConfPBOutBinaryStream != null){ - amConfPBOutBinaryStream.close(); + FSDataOutputStream amConfPBOutBinaryStream = null; + try { + amConfPBOutBinaryStream = TezCommonUtils.createFileForAM(fs, binaryConfPath); + finalConfProto.writeTo(amConfPBOutBinaryStream); + } finally { + if (amConfPBOutBinaryStream != null) { + amConfPBOutBinaryStream.close(); + } } } @@ -606,7 +594,7 @@ public static ApplicationSubmissionContext createApplicationSubmissionContext( if (amLocalResources != null && !amLocalResources.isEmpty()) { amResourceProto = DagTypeConverters.convertFromLocalResources(amLocalResources); } else { - amResourceProto = DAGProtos.PlanLocalResourcesProto.getDefaultInstance(); + amResourceProto = DAGProtos.PlanLocalResourcesProto.getDefaultInstance(); } amResourceProto.writeDelimitedTo(sessionJarsPBOutStream); } finally { @@ -628,7 +616,7 @@ public static ApplicationSubmissionContext createApplicationSubmissionContext( Map acls = aclManager.toYARNACls(); if(dag != null) { - + DAGPlan dagPB = prepareAndCreateDAGPlan(dag, amConfig, tezJarResources, tezLrsAsArchive, sessionCreds, servicePluginsDescriptor, javaOptsChecker); @@ -713,38 +701,72 @@ public static ApplicationSubmissionContext createApplicationSubmissionContext( return appContext; } - + + static Credentials prepareAmLaunchCredentials(AMConfiguration amConfig, Credentials sessionCreds, + TezConfiguration conf, Path binaryConfPath) throws IOException { + // Setup security tokens + Credentials amLaunchCredentials = new Credentials(); + + // Add SimpleHistoryLoggingService logDir creds to the list of session credentials + // If it is on HDFS + String simpleHistoryLogDir = conf.get(TezConfiguration.TEZ_SIMPLE_HISTORY_LOGGING_DIR); + if (simpleHistoryLogDir != null && !simpleHistoryLogDir.isEmpty()) { + Path simpleHistoryLogDirPath = new Path(simpleHistoryLogDir); + TokenCache.obtainTokensForFileSystems(sessionCreds, new Path[] { simpleHistoryLogDirPath }, + conf); + } + + // Add Staging dir creds to the list of session credentials. + TokenCache.obtainTokensForFileSystems(sessionCreds, new Path[] {binaryConfPath }, conf); + + populateTokenCache(conf, sessionCreds); + + // Add session specific credentials to the AM credentials. + amLaunchCredentials.mergeAll(sessionCreds); + + if (amConfig.getCredentials() != null) { + amLaunchCredentials.mergeAll(amConfig.getCredentials()); + } + TezCommonUtils.logCredentials(LOG, amLaunchCredentials, "amLaunch"); + return amLaunchCredentials; + } + + //get secret keys and tokens and store them into TokenCache + private static void populateTokenCache(TezConfiguration conf, Credentials credentials) + throws IOException{ + // add the delegation tokens from configuration + String[] nameNodes = conf.getStrings(TezConfiguration.TEZ_JOB_FS_SERVERS); + LOG.debug("adding the following namenodes' delegation tokens:" + + Arrays.toString(nameNodes)); + if(nameNodes != null) { + Path[] ps = new Path[nameNodes.length]; + for(int i = 0; i < nameNodes.length; i++) { + ps[i] = new Path(nameNodes[i]); + } + TokenCache.obtainTokensForFileSystems(credentials, ps, conf); + } + } + static DAGPlan prepareAndCreateDAGPlan(DAG dag, AMConfiguration amConfig, Map tezJarResources, boolean tezLrsAsArchive, Credentials credentials, ServicePluginsDescriptor servicePluginsDescriptor, JavaOptsChecker javaOptsChecker) throws IOException { Credentials dagCredentials = setupDAGCredentials(dag, credentials, amConfig.getTezConfiguration()); + TezCommonUtils.logCredentials(LOG, dagCredentials, "dagPlan"); return dag.createDag(amConfig.getTezConfiguration(), dagCredentials, tezJarResources, amConfig.getBinaryConfLR(), tezLrsAsArchive, servicePluginsDescriptor, javaOptsChecker); } - - static void maybeAddDefaultLoggingJavaOpts(String logLevel, List vargs) { - Preconditions.checkNotNull(vargs); - if (!vargs.isEmpty()) { - for (String arg : vargs) { - if (arg.contains(TezConstants.TEZ_ROOT_LOGGER_NAME)) { - return; - } - } - } - TezClientUtils.addLog4jSystemProperties(logLevel, vargs); - } @Private public static String maybeAddDefaultLoggingJavaOpts(String logLevel, String javaOpts) { List vargs = new ArrayList(5); if (javaOpts != null) { - vargs.add(javaOpts); + Collections.addAll(vargs, javaOpts.split(" ")); } else { vargs.add(""); } - maybeAddDefaultLoggingJavaOpts(logLevel, vargs); + TezClientUtils.addLog4jSystemProperties(logLevel, vargs); if (vargs.size() == 1) { return vargs.get(0); } @@ -790,13 +812,25 @@ public static String addDefaultsToTaskLaunchCmdOpts(String vOpts, Configuration @VisibleForTesting public static void addLog4jSystemProperties(String logLevel, List vargs) { + Objects.requireNonNull(vargs); vargs.add("-Dlog4j.configuratorClass=org.apache.tez.common.TezLog4jConfigurator"); vargs.add("-Dlog4j.configuration=" + TezConstants.TEZ_CONTAINER_LOG4J_PROPERTIES_FILE); vargs.add("-D" + YarnConfiguration.YARN_APP_CONTAINER_LOG_DIR + "=" + ApplicationConstants.LOG_DIR_EXPANSION_VAR); - vargs.add("-D" + TezConstants.TEZ_ROOT_LOGGER_NAME + "=" + logLevel - + "," + TezConstants.TEZ_CONTAINER_LOGGER_NAME); + boolean isRootLoggerPresent = false; + String rootLoggerArg = "-D" + TezConstants.TEZ_ROOT_LOGGER_NAME + "=" + logLevel + + "," + TezConstants.TEZ_CONTAINER_LOGGER_NAME; + for (int i = 0; i < vargs.size(); i++) { + String arg = vargs.get(i); + if (arg.contains(TezConstants.TEZ_ROOT_LOGGER_NAME)) { + vargs.set(i, rootLoggerArg); + isRootLoggerPresent = true; + } + } + if (!isRootLoggerPresent) { + vargs.add(rootLoggerArg); + } } static ConfigurationProto createFinalConfProtoForApp(Configuration amConf, @@ -804,10 +838,16 @@ static ConfigurationProto createFinalConfProtoForApp(Configuration amConf, assert amConf != null; ConfigurationProto.Builder builder = ConfigurationProto.newBuilder(); for (Entry entry : amConf) { - PlanKeyValuePair.Builder kvp = PlanKeyValuePair.newBuilder(); - kvp.setKey(entry.getKey()); - kvp.setValue(amConf.get(entry.getKey())); - builder.addConfKeyValues(kvp); + String key = entry.getKey(); + String val = amConf.get(key); + if(val != null) { + PlanKeyValuePair.Builder kvp = PlanKeyValuePair.newBuilder(); + kvp.setKey(key); + kvp.setValue(val); + builder.addConfKeyValues(kvp); + } else { + LOG.debug("null value in Configuration after replacement for key={}. Skipping.", key); + } } AMPluginDescriptorProto pluginDescriptorProto = @@ -871,13 +911,12 @@ private static Path localizeDagPlanAsText(DAGPlan dagPB, FileSystem fs, AMConfig return textPath; } - static DAGClientAMProtocolBlockingPB getAMProxy(FrameworkClient yarnClient, - Configuration conf, - ApplicationId applicationId) throws TezException, IOException { + static DAGClientAMProtocolBlockingPB getAMProxy(FrameworkClient frameworkClient, + Configuration conf, ApplicationId applicationId, UserGroupInformation ugi) + throws TezException, IOException { ApplicationReport appReport; try { - appReport = yarnClient.getApplicationReport( - applicationId); + appReport = frameworkClient.getApplicationReport(applicationId); if(appReport == null) { throw new TezUncheckedException("Could not retrieve application report" @@ -907,25 +946,23 @@ static DAGClientAMProtocolBlockingPB getAMProxy(FrameworkClient yarnClient, } catch (YarnException e) { throw new TezException(e); } - return getAMProxy(conf, appReport.getHost(), - appReport.getRpcPort(), appReport.getClientToAMToken()); + + return getAMProxy(conf, appReport.getHost(), appReport.getRpcPort(), + appReport.getClientToAMToken(), ugi); } @Private public static DAGClientAMProtocolBlockingPB getAMProxy(final Configuration conf, String amHost, - int amRpcPort, org.apache.hadoop.yarn.api.records.Token clientToAMToken) throws IOException { + int amRpcPort, org.apache.hadoop.yarn.api.records.Token clientToAMToken, + UserGroupInformation userUgi) throws IOException { final InetSocketAddress serviceAddr = NetUtils.createSocketAddrForHost(amHost, amRpcPort); - UserGroupInformation userUgi = UserGroupInformation.createRemoteUser(UserGroupInformation - .getCurrentUser().getUserName()); if (clientToAMToken != null) { Token token = ConverterUtils.convertFromYarn(clientToAMToken, serviceAddr); userUgi.addToken(token); } - if (LOG.isDebugEnabled()) { - LOG.debug("Connecting to Tez AM at " + serviceAddr); - } + LOG.debug("Connecting to Tez AM at {}", serviceAddr); DAGClientAMProtocolBlockingPB proxy = null; try { proxy = userUgi.doAs(new PrivilegedExceptionAction() { @@ -969,11 +1006,11 @@ public static String maybeAddDefaultMemoryJavaOpts(String javaOpts, Resource res return javaOpts; } - if ((maxHeapFactor <= 0 && !Precision.equals(maxHeapFactor, -1, 0.01)) || maxHeapFactor >= 1) { + if ((maxHeapFactor <= 0 && Double.valueOf("-1") != maxHeapFactor) || maxHeapFactor >= 1) { return javaOpts; } - if (Precision.equals(maxHeapFactor, -1, 0.01)) { + if (Double.valueOf("-1") == maxHeapFactor) { maxHeapFactor = resource.getMemory() < TezConstants.TEZ_CONTAINER_SMALL_SLAB_BOUND_MB ? TezConstants.TEZ_CONTAINER_MAX_JAVA_HEAP_FRACTION_SMALL_SLAB : TezConstants.TEZ_CONTAINER_MAX_JAVA_HEAP_FRACTION_LARGE_SLAB; @@ -1075,7 +1112,7 @@ public static void setApplicationPriority(ApplicationSubmissionContext context, int priority = amConfig.getTezConfiguration().getInt(TezConfiguration.TEZ_AM_APPLICATION_PRIORITY, 0); context.setPriority(Priority.newInstance(priority)); if (LOG.isDebugEnabled()) { - LOG.debug("Settting TEZ application priority, applicationId= " + context.getApplicationId() + + LOG.debug("Setting TEZ application priority, applicationId= " + context.getApplicationId() + ", priority= " + context.getPriority().getPriority()); } } @@ -1086,7 +1123,7 @@ public static byte[] getLocalSha(Path path, Configuration conf) throws IOExcepti InputStream is = null; try { is = FileSystem.getLocal(conf).open(path); - return DigestUtils.sha256(is); + return DigestUtils.sha384(is); } finally { if (is != null) { is.close(); @@ -1098,7 +1135,7 @@ public static byte[] getResourceSha(URI uri, Configuration conf) throws IOExcept InputStream is = null; try { is = FileSystem.get(uri, conf).open(new Path(uri)); - return DigestUtils.sha256(is); + return DigestUtils.sha384(is); } finally { if (is != null) { is.close(); diff --git a/tez-api/src/main/java/org/apache/tez/client/TezYarnClient.java b/tez-api/src/main/java/org/apache/tez/client/TezYarnClient.java index 2a0c79ab65..d109648075 100644 --- a/tez-api/src/main/java/org/apache/tez/client/TezYarnClient.java +++ b/tez-api/src/main/java/org/apache/tez/client/TezYarnClient.java @@ -40,13 +40,16 @@ public class TezYarnClient extends FrameworkClient { private volatile boolean isRunning; + private String amHost; + private int amPort; + protected TezYarnClient(YarnClient yarnClient) { this.yarnClient = yarnClient; } @Override - public void init(TezConfiguration tezConf, YarnConfiguration yarnConf) { - yarnClient.init(yarnConf); + public void init(TezConfiguration tezConf) { + yarnClient.init(new YarnConfiguration(tezConf)); } @Override @@ -100,6 +103,8 @@ public ApplicationReport getApplicationReport(ApplicationId appId) throws YarnEx throw new ApplicationNotFoundException("YARN reports no state for application " + appId); } + this.amHost = report.getHost(); + this.amPort = report.getRpcPort(); return report; } @@ -107,4 +112,14 @@ public ApplicationReport getApplicationReport(ApplicationId appId) throws YarnEx public boolean isRunning() throws IOException { return isRunning; } + + @Override + public String getAmHost() { + return amHost; + } + + @Override + public int getAmPort() { + return amPort; + } } diff --git a/tez-api/src/main/java/org/apache/tez/common/ATSConstants.java b/tez-api/src/main/java/org/apache/tez/common/ATSConstants.java index 6e07849341..e3c90d3154 100644 --- a/tez-api/src/main/java/org/apache/tez/common/ATSConstants.java +++ b/tez-api/src/main/java/org/apache/tez/common/ATSConstants.java @@ -107,6 +107,10 @@ public class ATSConstants { public static final String COUNTER_NAME = "counterName"; public static final String COUNTER_DISPLAY_NAME = "counterDisplayName"; public static final String COUNTER_VALUE = "counterValue"; + public static final String COUNTER_MIN_VALUE = "counterMinValue"; + public static final String COUNTER_MAX_VALUE = "counterMaxValue"; + public static final String COUNTER_INSTANCE_COUNT = "counterInstanceCount"; + /* Url related */ public static final String RESOURCE_URI_BASE = "/ws/v1/timeline"; @@ -134,4 +138,5 @@ public class ATSConstants { public static final String CALLER_TYPE = "callerType"; public static final String DESCRIPTION = "description"; + protected ATSConstants() {} } diff --git a/tez-api/src/main/java/org/apache/tez/common/CachedEntity.java b/tez-api/src/main/java/org/apache/tez/common/CachedEntity.java new file mode 100644 index 0000000000..47ccd77e48 --- /dev/null +++ b/tez-api/src/main/java/org/apache/tez/common/CachedEntity.java @@ -0,0 +1,104 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tez.common; + +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicReference; + +import org.apache.hadoop.yarn.util.Clock; +import org.apache.hadoop.yarn.util.MonotonicClock; + +/** + * A thread safe implementation used as a container for cacheable entries with Expiration times. + * It supports custom {@link Clock} to control the elapsed time calculation. + * @param the data object type. + */ +public class CachedEntity { + private final AtomicReference entryDataRef; + private final Clock cacheClock; + private final long expiryDurationMS; + private volatile long entryTimeStamp; + + public CachedEntity(TimeUnit expiryTimeUnit, long expiryLength, Clock clock) { + entryDataRef = new AtomicReference<>(null); + cacheClock = clock; + expiryDurationMS = TimeUnit.MILLISECONDS.convert(expiryLength, expiryTimeUnit); + entryTimeStamp = 0; + } + + public CachedEntity(TimeUnit expiryTimeUnit, long expiryLength) { + this(expiryTimeUnit, expiryLength, new MonotonicClock()); + } + + /** + * + * @return true if expiration timestamp is 0, or the elapsed time since last update is + * greater than {@link #expiryDurationMS} + */ + public boolean isExpired() { + return (entryTimeStamp == 0) + || ((cacheClock.getTime() - entryTimeStamp) > expiryDurationMS); + } + + /** + * If the entry has expired, it reset the cache reference through {@link #clearExpiredEntry()}. + * @return cached data if the timestamp is valid. Null, if the timestamp has expired. + */ + public T getValue() { + if (isExpired()) { // quick check for expiration + if (clearExpiredEntry()) { // remove reference to the expired entry + return null; + } + } + return entryDataRef.get(); + } + + /** + * Safely sets the cached data. + * @param newEntry + */ + public void setValue(T newEntry) { + T currentEntry = entryDataRef.get(); + while (!entryDataRef.compareAndSet(currentEntry, newEntry)) { + currentEntry = entryDataRef.get(); + } + entryTimeStamp = cacheClock.getTime(); + } + + /** + * Enforces the expiration of the cached entry. + */ + public void enforceExpiration() { + entryTimeStamp = 0; + } + + /** + * Safely deletes the reference to the data if it was not null. + * @return true if the reference is set to Null. False indicates that another thread + * updated the cache. + */ + private boolean clearExpiredEntry() { + T currentEntry = entryDataRef.get(); + if (currentEntry == null) { + return true; + } + // the current value is not null: try to reset it. + // if the CAS is successful, then we won't override a recent update to the cache. + return (entryDataRef.compareAndSet(currentEntry, null)); + } +} diff --git a/tez-api/src/main/java/org/apache/tez/common/GuavaShim.java b/tez-api/src/main/java/org/apache/tez/common/GuavaShim.java new file mode 100644 index 0000000000..d9b8796d61 --- /dev/null +++ b/tez-api/src/main/java/org/apache/tez/common/GuavaShim.java @@ -0,0 +1,54 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tez.common; + +import com.google.common.util.concurrent.MoreExecutors; + +import java.lang.reflect.InvocationTargetException; +import java.lang.reflect.Method; +import java.util.concurrent.Executor; + +/** + * A interoperability layer to work with multiple versions of guava. + */ +public final class GuavaShim { + + static { + try { + executorMethod = MoreExecutors.class.getDeclaredMethod("directExecutor"); + } catch (NoSuchMethodException nsme) { + try { + executorMethod = MoreExecutors.class.getDeclaredMethod("sameThreadExecutor"); + } catch (NoSuchMethodException nsmeSame) { + } + } + } + + private GuavaShim() { + } + + private static Method executorMethod; + + public static Executor directExecutor() { + try { + return (Executor) executorMethod.invoke(null); + } catch (IllegalAccessException | IllegalArgumentException | InvocationTargetException e) { + throw new RuntimeException(e); + } + } +} \ No newline at end of file diff --git a/tez-api/src/main/java/org/apache/tez/common/JavaOptsChecker.java b/tez-api/src/main/java/org/apache/tez/common/JavaOptsChecker.java index 6de402aae9..3e93446149 100644 --- a/tez-api/src/main/java/org/apache/tez/common/JavaOptsChecker.java +++ b/tez-api/src/main/java/org/apache/tez/common/JavaOptsChecker.java @@ -38,9 +38,8 @@ public class JavaOptsChecker { public void checkOpts(String opts) throws TezException { Set gcOpts = new TreeSet(); - if (LOG.isDebugEnabled()) { - LOG.debug("Checking JVM GC opts: " + opts); - } + LOG.debug("Checking JVM GC opts: {}", opts); + Matcher matcher = pattern.matcher(opts); while (matcher.find()) { if (matcher.groupCount() != 3) { @@ -74,10 +73,8 @@ public void checkOpts(String opts) throws TezException { } } - if (LOG.isDebugEnabled()) { - LOG.debug("Found clashing GC opts" - + ", conflicting GC Values=" + gcOpts); - } + LOG.debug("Found clashing GC opts, conflicting GC Values={}", gcOpts); + throw new TezException("Invalid/conflicting GC options found," + " cmdOpts=\"" + opts + "\""); } diff --git a/tez-api/src/main/java/org/apache/tez/common/Preconditions.java b/tez-api/src/main/java/org/apache/tez/common/Preconditions.java new file mode 100644 index 0000000000..b32d951101 --- /dev/null +++ b/tez-api/src/main/java/org/apache/tez/common/Preconditions.java @@ -0,0 +1,115 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tez.common; + +import javax.annotation.Nullable; + +/** + * A simplified version of Guava's Preconditions for making it easy to handle its usage in Tez project. + */ +//TODO remove this and make Preconditions class in common module available everywhere +public class Preconditions { + + private Preconditions() { + } + + public static void checkArgument(boolean expression) { + if (!expression) { + throw new IllegalArgumentException(); + } + } + + public static void checkArgument(boolean expression, @Nullable Object message) { + if (!expression) { + throw new IllegalArgumentException(String.valueOf(message)); + } + } + + public static void checkArgument(boolean expression, @Nullable String template, @Nullable Object... args) { + if (!expression) { + throw new IllegalArgumentException(format(template, args)); + } + } + + public static void checkState(boolean expression) { + if (!expression) { + throw new IllegalStateException(); + } + } + + public static void checkState(boolean expression, @Nullable Object message) { + if (!expression) { + throw new IllegalStateException(String.valueOf(message)); + } + } + + public static void checkState(boolean expression, @Nullable String template, @Nullable Object... args) { + if (!expression) { + throw new IllegalStateException(format(template, args)); + } + } + + private static String format(@Nullable String template, @Nullable Object... args) { + template = String.valueOf(template); // null -> "null" + + if (args == null) { + args = new Object[] { "(Object[])null" }; + } else { + for (int i = 0; i < args.length; i++) { + args[i] = lenientToString(args[i]); + } + } + + // start substituting the arguments into the '%s' placeholders + StringBuilder builder = new StringBuilder(template.length() + 16 * args.length); + int templateStart = 0; + int i = 0; + while (i < args.length) { + int placeholderStart = template.indexOf("%s", templateStart); + if (placeholderStart == -1) { + break; + } + builder.append(template, templateStart, placeholderStart); + builder.append(args[i++]); + templateStart = placeholderStart + 2; + } + builder.append(template, templateStart, template.length()); + + // if we run out of placeholders, append the extra args in square braces + if (i < args.length) { + builder.append(" ["); + builder.append(args[i++]); + while (i < args.length) { + builder.append(", "); + builder.append(args[i++]); + } + builder.append(']'); + } + + return builder.toString(); + } + + private static String lenientToString(@Nullable Object o) { + try { + return String.valueOf(o); + } catch (Exception e) { + String objectToString = o.getClass().getName() + '@' + Integer.toHexString(System.identityHashCode(o)); + return "<" + objectToString + " threw " + e.getClass().getName() + ">"; + } + } +} diff --git a/tez-api/src/main/java/org/apache/tez/common/ProgressHelper.java b/tez-api/src/main/java/org/apache/tez/common/ProgressHelper.java index 407a20e299..289847a96a 100644 --- a/tez-api/src/main/java/org/apache/tez/common/ProgressHelper.java +++ b/tez-api/src/main/java/org/apache/tez/common/ProgressHelper.java @@ -19,71 +19,151 @@ package org.apache.tez.common; import com.google.common.util.concurrent.ThreadFactoryBuilder; +import java.util.concurrent.RejectedExecutionException; +import java.util.concurrent.ScheduledFuture; +import java.util.concurrent.atomic.AtomicReference; import org.apache.tez.runtime.api.AbstractLogicalInput; import org.apache.tez.runtime.api.LogicalInput; import org.apache.tez.runtime.api.ProcessorContext; -import org.apache.tez.runtime.api.ProgressFailedException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; - import java.util.Map; import java.util.concurrent.Executors; import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.TimeUnit; public class ProgressHelper { - private static final Logger LOG = LoggerFactory.getLogger(ProgressHelper.class); - private String processorName; + private static final Logger LOG = + LoggerFactory.getLogger(ProgressHelper.class); + private static final float MIN_PROGRESS_VAL = 0.0f; + private static final float MAX_PROGRESS_VAL = 1.0f; + private final String processorName; protected final Map inputs; - final ProcessorContext processorContext; + private final ProcessorContext processorContext; + private final AtomicReference> periodicMonitorTaskRef; + private long monitorExecPeriod; + private volatile ScheduledExecutorService scheduledExecutorService; - volatile ScheduledExecutorService scheduledExecutorService; - Runnable monitorProgress = new Runnable() { - @Override - public void run() { - try { - float progSum = 0.0f; - float progress; - if (inputs != null && inputs.size() != 0) { - for (LogicalInput input : inputs.values()) { - if (input instanceof AbstractLogicalInput) { - progSum += ((AbstractLogicalInput) input).getProgress(); + public static final float processProgress(float val) { + return (Float.isNaN(val)) ? MIN_PROGRESS_VAL + : Math.max(MIN_PROGRESS_VAL, Math.min(MAX_PROGRESS_VAL, val)); + } + + public static final boolean isProgressWithinRange(float val) { + return (val <= MAX_PROGRESS_VAL && val >= MIN_PROGRESS_VAL); + } + + public ProgressHelper(Map inputsParam, + ProcessorContext context, String processorName) { + this.periodicMonitorTaskRef = new AtomicReference<>(null); + this.inputs = inputsParam; + this.processorContext = context; + this.processorName = processorName; + } + + public void scheduleProgressTaskService(long delay, long period) { + monitorExecPeriod = period; + scheduledExecutorService = + Executors.newScheduledThreadPool(1, + new ThreadFactoryBuilder().setDaemon(true).setNameFormat( + "TaskProgressService{" + processorName + ":" + processorContext + .getTaskVertexName() + + "} #%d").build()); + try { + createPeriodicTask(delay); + } catch (RejectedExecutionException | IllegalArgumentException ex) { + LOG.error("Could not create periodic scheduled task for processor={}", + processorName, ex); + } + } + + private Runnable createRunnableMonitor() { + return new Runnable() { + @Override + public void run() { + try { + float progSum = MIN_PROGRESS_VAL; + int invalidInput = 0; + float progressVal = MIN_PROGRESS_VAL; + if (inputs != null && !inputs.isEmpty()) { + for (LogicalInput input : inputs.values()) { + if (!(input instanceof AbstractLogicalInput)) { + /** + * According to javdoc in + * {@link org.apache.tez.runtime.api.AbstractLogicalInput} all + * implementations must extend AbstractLogicalInput. + */ + continue; + } + final float inputProgress = + ((AbstractLogicalInput) input).getProgress(); + if (!isProgressWithinRange(inputProgress)) { + final int invalidSnapshot = ++invalidInput; + if (LOG.isDebugEnabled()) { + LOG.debug( + "progress update: Incorrect value in progress helper in " + + "processor={}, inputProgress={}, inputsSize={}, " + + "invalidInput={}", + processorName, inputProgress, inputs.size(), + invalidSnapshot); + } + } + progSum += processProgress(inputProgress); } + // No need to process the average within the valid range since the + // processorContext validates the value before being set. + progressVal = progSum / inputs.size(); + } + // Report progress as 0.0f when if are errors. + processorContext.setProgress(progressVal); + } catch (Throwable th) { + LOG.debug("progress update: Encountered InterruptedException during" + + " Processor={}", processorName, th); + if (th instanceof InterruptedException) { + // set interrupt flag to true sand exit + Thread.currentThread().interrupt(); + return; } - progress = (1.0f) * progSum / inputs.size(); - } else { - progress = 1.0f; } - processorContext.setProgress(progress); - } catch (ProgressFailedException pe) { - LOG.warn("Encountered ProgressFailedException during Processor progress update" - + pe); - } catch (InterruptedException ie) { - LOG.warn("Encountered InterruptedException during Processor progress update" - + ie); } - } - }; + }; + } - public ProgressHelper(Map _inputs, ProcessorContext context, String processorName) { - this.inputs = _inputs; - this.processorContext = context; - this.processorName = processorName; + private boolean createPeriodicTask(long delay) + throws RejectedExecutionException, IllegalArgumentException { + stopPeriodicMonitor(); + final Runnable runnableMonitor = createRunnableMonitor(); + ScheduledFuture futureTask = scheduledExecutorService + .scheduleWithFixedDelay(runnableMonitor, delay, monitorExecPeriod, + TimeUnit.MILLISECONDS); + periodicMonitorTaskRef.set(futureTask); + return true; } - public void scheduleProgressTaskService(long delay, long period) { - scheduledExecutorService = Executors.newScheduledThreadPool(1, new ThreadFactoryBuilder() - .setDaemon(true).setNameFormat("TaskProgressService{" + processorName+ ":" + processorContext.getTaskVertexName() - + "} #%d").build()); - scheduledExecutorService.scheduleWithFixedDelay(monitorProgress, delay, period, - TimeUnit.MILLISECONDS); + private void stopPeriodicMonitor() { + ScheduledFuture scheduledMonitorRes = + this.periodicMonitorTaskRef.get(); + if (scheduledMonitorRes != null && !scheduledMonitorRes.isCancelled()) { + scheduledMonitorRes.cancel(true); + this.periodicMonitorTaskRef.set(null); + } } public void shutDownProgressTaskService() { + stopPeriodicMonitor(); if (scheduledExecutorService != null) { + scheduledExecutorService.shutdown(); + try { + if (!scheduledExecutorService.awaitTermination(monitorExecPeriod, + TimeUnit.MILLISECONDS)) { + scheduledExecutorService.shutdownNow(); + } + } catch (InterruptedException e) { + LOG.debug("Interrupted exception while shutting down the " + + "executor service for the processor name={}", processorName); + } scheduledExecutorService.shutdownNow(); - scheduledExecutorService = null; } + scheduledExecutorService = null; } - } diff --git a/tez-api/src/main/java/org/apache/tez/common/RPCUtil.java b/tez-api/src/main/java/org/apache/tez/common/RPCUtil.java index caeb822bc7..ab265f6ea8 100644 --- a/tez-api/src/main/java/org/apache/tez/common/RPCUtil.java +++ b/tez-api/src/main/java/org/apache/tez/common/RPCUtil.java @@ -24,12 +24,15 @@ import org.apache.hadoop.ipc.RemoteException; import org.apache.tez.dag.api.DAGNotRunningException; +import org.apache.tez.dag.api.NoCurrentDAGException; import org.apache.tez.dag.api.SessionNotRunning; import org.apache.tez.dag.api.TezException; import com.google.protobuf.ServiceException; -public class RPCUtil { +public final class RPCUtil { + + private RPCUtil() {} /** * Returns an instance of {@link TezException} @@ -55,17 +58,8 @@ private static T instantiateException( return ex; // RemoteException contains useful information as against the // java.lang.reflect exceptions. - } catch (NoSuchMethodException e) { - throw re; - } catch (IllegalArgumentException e) { - throw re; - } catch (SecurityException e) { - throw re; - } catch (InstantiationException e) { - throw re; - } catch (IllegalAccessException e) { - throw re; - } catch (InvocationTargetException e) { + } catch (NoSuchMethodException | IllegalArgumentException | SecurityException | InstantiationException + | IllegalAccessException | InvocationTargetException e) { throw re; } } @@ -85,12 +79,6 @@ private static T instantiateRuntimeException( return instantiateException(cls, re); } - private static T instantiateSessionNotRunningException( - Class cls, RemoteException re) throws RemoteException { - return instantiateException(cls, re); - } - - /** * Utility method that unwraps and returns appropriate exceptions. * @@ -109,7 +97,7 @@ public static Void unwrapAndThrowException(ServiceException se) } else { if (cause instanceof RemoteException) { RemoteException re = (RemoteException) cause; - Class realClass = null; + Class realClass; try { realClass = Class.forName(re.getClassName()); } catch (ClassNotFoundException cnf) { @@ -125,6 +113,9 @@ public static Void unwrapAndThrowException(ServiceException se) } else if (DAGNotRunningException.class.isAssignableFrom(realClass)) { throw instantiateTezException( realClass.asSubclass(DAGNotRunningException.class), re); + } else if (NoCurrentDAGException.class.isAssignableFrom(realClass)) { + throw instantiateTezException( + realClass.asSubclass(NoCurrentDAGException.class), re); } else if (TezException.class.isAssignableFrom(realClass)) { throw instantiateTezException( realClass.asSubclass(TezException.class), re); diff --git a/tez-api/src/main/java/org/apache/tez/common/ReflectionUtils.java b/tez-api/src/main/java/org/apache/tez/common/ReflectionUtils.java index 4d89ed4f9d..5bfb41c586 100644 --- a/tez-api/src/main/java/org/apache/tez/common/ReflectionUtils.java +++ b/tez-api/src/main/java/org/apache/tez/common/ReflectionUtils.java @@ -19,22 +19,21 @@ package org.apache.tez.common; import java.lang.reflect.Constructor; -import java.lang.reflect.InvocationTargetException; import java.lang.reflect.Method; import java.net.URL; -import java.net.URLClassLoader; import java.util.List; import java.util.Map; import java.util.concurrent.ConcurrentHashMap; import org.apache.hadoop.classification.InterfaceAudience.Private; import org.apache.tez.dag.api.TezReflectionException; -import org.apache.tez.dag.api.TezUncheckedException; @Private -public class ReflectionUtils { +public final class ReflectionUtils { - private static final Map> CLAZZ_CACHE = new ConcurrentHashMap>(); + private static final Map> CLAZZ_CACHE = new ConcurrentHashMap<>(); + + private ReflectionUtils() {} @Private public static Class getClazz(String className) throws TezReflectionException { @@ -109,43 +108,11 @@ public static Method getMethod(Class targetClazz, String methodName, Class } } - @Private - public static synchronized void addResourcesToClasspath(List urls) { - ClassLoader classLoader = new URLClassLoader(urls.toArray(new URL[urls.size()]), Thread - .currentThread().getContextClassLoader()); - Thread.currentThread().setContextClassLoader(classLoader); - } - - // Parameters for addResourcesToSystemClassLoader - private static final Class[] parameters = new Class[]{URL.class}; - private static Method sysClassLoaderMethod = null; - @Private public static synchronized void addResourcesToSystemClassLoader(List urls) { - URLClassLoader sysLoader = (URLClassLoader)ClassLoader.getSystemClassLoader(); - if (sysClassLoaderMethod == null) { - Class sysClass = URLClassLoader.class; - Method method; - try { - method = sysClass.getDeclaredMethod("addURL", parameters); - } catch (SecurityException e) { - throw new TezUncheckedException("Failed to get handle on method addURL", e); - } catch (NoSuchMethodException e) { - throw new TezUncheckedException("Failed to get handle on method addURL", e); - } - method.setAccessible(true); - sysClassLoaderMethod = method; - } + TezClassLoader classLoader = TezClassLoader.getInstance(); for (URL url : urls) { - try { - sysClassLoaderMethod.invoke(sysLoader, new Object[] { url }); - } catch (IllegalArgumentException e) { - throw new TezUncheckedException("Failed to invoke addURL for rsrc: " + url, e); - } catch (IllegalAccessException e) { - throw new TezUncheckedException("Failed to invoke addURL for rsrc: " + url, e); - } catch (InvocationTargetException e) { - throw new TezUncheckedException("Failed to invoke addURL for rsrc: " + url, e); - } + classLoader.addURL(url); } } } diff --git a/tez-api/src/main/java/org/apache/tez/common/TezClassLoader.java b/tez-api/src/main/java/org/apache/tez/common/TezClassLoader.java new file mode 100644 index 0000000000..53c9e51fc6 --- /dev/null +++ b/tez-api/src/main/java/org/apache/tez/common/TezClassLoader.java @@ -0,0 +1,75 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tez.common; + +import java.net.URL; +import java.net.URLClassLoader; +import java.security.AccessController; +import java.security.PrivilegedAction; + +import org.apache.hadoop.conf.Configuration; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * ClassLoader to allow addition of new paths to classpath in the runtime. + * + * It uses URLClassLoader with this class' classloader as parent classloader. + * And hence first delegates the resource loading to parent and then to the URLs + * added. The process must be setup to use by invoking setupTezClassLoader() which sets + * the global TezClassLoader as current thread context class loader. All threads + * created will inherit the classloader and hence will resolve the class/resource + * from TezClassLoader. + */ +public class TezClassLoader extends URLClassLoader { + private static final TezClassLoader INSTANCE; + private static final Logger LOG = LoggerFactory.getLogger(TezClassLoader.class); + + static { + INSTANCE = AccessController.doPrivileged(new PrivilegedAction() { + public TezClassLoader run() { + return new TezClassLoader(); + } + }); + } + + private TezClassLoader() { + super(new URL[] {}, TezClassLoader.class.getClassLoader()); + + LOG.info( + "Created TezClassLoader with parent classloader: {}, thread: {}, system classloader: {}", + TezClassLoader.class.getClassLoader(), Thread.currentThread().getId(), + ClassLoader.getSystemClassLoader()); + } + + public void addURL(URL url) { + super.addURL(url); + } + + public static TezClassLoader getInstance() { + return INSTANCE; + } + + public static void setupTezClassLoader() { + LOG.debug( + "Setting up TezClassLoader: thread: {}, current thread classloader: {} system classloader: {}", + Thread.currentThread().getId(), Thread.currentThread().getContextClassLoader(), + ClassLoader.getSystemClassLoader()); + Thread.currentThread().setContextClassLoader(INSTANCE); + } + + public static void setupForConfiguration(Configuration configuration) { + configuration.setClassLoader(INSTANCE); + } +} diff --git a/tez-api/src/main/java/org/apache/tez/common/TezCommonUtils.java b/tez-api/src/main/java/org/apache/tez/common/TezCommonUtils.java index 9cb76d9007..28799c1192 100644 --- a/tez-api/src/main/java/org/apache/tez/common/TezCommonUtils.java +++ b/tez-api/src/main/java/org/apache/tez/common/TezCommonUtils.java @@ -22,10 +22,10 @@ import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.Collection; -import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.StringTokenizer; +import java.util.stream.Collectors; import java.util.zip.Deflater; import java.util.zip.DeflaterOutputStream; import java.util.zip.Inflater; @@ -38,6 +38,7 @@ import org.apache.hadoop.classification.InterfaceAudience.Private; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.permission.FsPermission; @@ -55,7 +56,7 @@ import com.google.protobuf.ByteString; @Private -public class TezCommonUtils { +public final class TezCommonUtils { public static final FsPermission TEZ_AM_DIR_PERMISSION = FsPermission .createImmutable((short) 0700); // rwx-------- public static final FsPermission TEZ_AM_FILE_PERMISSION = FsPermission @@ -64,6 +65,8 @@ public class TezCommonUtils { public static final String TEZ_SYSTEM_SUB_DIR = ".tez"; + private TezCommonUtils() {} + /** *

* This function returns the staging directory defined in the config with @@ -73,7 +76,7 @@ public class TezCommonUtils { * function makes sure if the staging directory exists. If not, it creates the * directory with permission TEZ_AM_DIR_PERMISSION. *

- * + * * @param conf * TEZ configuration * @return Fully qualified staging directory @@ -103,7 +106,7 @@ public static Path getTezBaseStagingPath(Configuration conf) { * sub-directory (TEZ_SYSTEM_SUB_DIR/) under the base * staging directory, often provided by user. *

- * + * * @param conf * Tez configuration * @param strAppId @@ -134,7 +137,7 @@ public static Path createTezSystemStagingPath(Configuration conf, String strAppI * its temporary files under this sub-directory. The function normally doesn't * creates any sub-directory under the base staging directory. *

- * + * * @param conf * Tez configuration * @param strAppId @@ -154,7 +157,7 @@ public static Path getTezSystemStagingPath(Configuration conf, String strAppId) *

* Returns a path to store binary configuration *

- * + * * @param tezSysStagingPath * TEZ system level staging directory used for Tez internals * @return path to configuration @@ -168,7 +171,7 @@ public static Path getTezConfStagingPath(Path tezSysStagingPath) { *

* Returns a path to store local resources/session jars *

- * + * * @param tezSysStagingPath * TEZ system level staging directory used for Tez internals * @return path to store the session jars @@ -182,7 +185,7 @@ public static Path getTezAMJarStagingPath(Path tezSysStagingPath) { *

* Returns a path to store binary plan *

- * + * * @param tezSysStagingPath * TEZ system level staging directory used for Tez internals * @return path to store the plan in binary @@ -196,7 +199,7 @@ public static Path getTezBinPlanStagingPath(Path tezSysStagingPath) { *

* Returns a path to store text plan *

- * + * * @param tezSysStagingPath * TEZ system level staging directory used for Tez internals * @param strAppId @@ -216,28 +219,27 @@ public static Path getTezTextPlanStagingPath(Path tezSysStagingPath, String strA *

* Returns a path to store recovery information *

- * + * * @param tezSysStagingPath * TEZ system level staging directory used for Tez internals * @param conf * Tez configuration * @return App recovery path - * @throws IOException */ @Private public static Path getRecoveryPath(Path tezSysStagingPath, Configuration conf) throws IOException { - Path baseReecoveryPath = new Path(tezSysStagingPath, + Path baseRecoveryPath = new Path(tezSysStagingPath, TezConstants.DAG_RECOVERY_DATA_DIR_NAME); - FileSystem recoveryFS = baseReecoveryPath.getFileSystem(conf); - return recoveryFS.makeQualified(baseReecoveryPath); + FileSystem recoveryFS = baseRecoveryPath.getFileSystem(conf); + return recoveryFS.makeQualified(baseRecoveryPath); } /** *

* Returns a path to store app attempt specific recovery details *

- * + * * @param recoveryPath * TEZ recovery directory used for Tez internals * @param attemptID @@ -253,7 +255,7 @@ public static Path getAttemptRecoveryPath(Path recoveryPath, int attemptID) { *

* Returns a path to store DAG specific recovery info *

- * + * * @param attemptRecoverPath * :TEZ system level staging directory used for Tez internals * @param dagID @@ -269,7 +271,7 @@ public static Path getDAGRecoveryPath(Path attemptRecoverPath, String dagID) { *

* Returns a path to store summary info for recovery *

- * + * * @param attemptRecoverPath * TEZ system level staging directory used for Tez internals * @return Summary event path used in recovery @@ -283,15 +285,22 @@ public static Path getSummaryRecoveryPath(Path attemptRecoverPath) { *

* Create a directory with predefined directory permission *

- * + * * @param fs * Filesystem * @param dir * directory to be created - * @throws IOException */ public static void mkDirForAM(FileSystem fs, Path dir) throws IOException { - fs.mkdirs(dir, new FsPermission(TEZ_AM_DIR_PERMISSION)); + FsPermission perm = new FsPermission(TEZ_AM_DIR_PERMISSION); + fs.mkdirs(dir, perm); + FileStatus fileStatus = fs.getFileStatus(dir); + if (!fileStatus.getPermission().equals(perm)) { + LOG.warn("Directory " + dir.toString() + " created with unexpected permissions : " + + fileStatus.getPermission() + ". Fixing permissions to correct value : " + + perm.toString()); + fs.setPermission(dir, perm); + } } /** @@ -299,18 +308,17 @@ public static void mkDirForAM(FileSystem fs, Path dir) throws IOException { * Create a file with TEZ_AM_FILE_PERMISSION permission and * returns OutputStream *

- * + * * @param fs * Filesystem * @param filePath * file path to create the file * @return FSDataOutputStream - * @throws IOException */ public static FSDataOutputStream createFileForAM(FileSystem fs, Path filePath) throws IOException { return FileSystem.create(fs, filePath, new FsPermission(TEZ_AM_FILE_PERMISSION)); } - + public static void addAdditionalLocalResources(Map additionalLrs, Map originalLRs, String logContext) { // TODO TEZ-1798. Handle contents of Tez archives for duplicate LocalResource checks @@ -388,31 +396,38 @@ public static ByteString compressByteArrayToByteString(byte[] inBytes, Deflater @Private public static byte[] decompressByteStringToByteArray(ByteString byteString) throws IOException { - return decompressByteStringToByteArray(byteString, newInflater()); + Inflater inflater = newInflater(); + try { + return decompressByteStringToByteArray(byteString, inflater); + } finally { + inflater.end(); + } } @Private public static byte[] decompressByteStringToByteArray(ByteString byteString, Inflater inflater) throws IOException { inflater.reset(); - return IOUtils.toByteArray(new InflaterInputStream(byteString.newInput(), inflater)); - + try (InflaterInputStream inflaterInputStream = new InflaterInputStream(byteString.newInput(), inflater)) { + return IOUtils.toByteArray(inflaterInputStream); + } } public static String getCredentialsInfo(Credentials credentials, String identifier) { + if (credentials == null) { + return "Credentials: #" + identifier + "Tokens=null"; + } + StringBuilder sb = new StringBuilder(); - sb.append("Credentials: #" + identifier + "Tokens=").append(credentials.numberOfTokens()); + sb.append("Credentials: #").append(identifier).append("Tokens=").append(credentials.numberOfTokens()); if (credentials.numberOfTokens() > 0) { sb.append(", Services="); - Iterator> tokenItr = credentials.getAllTokens().iterator(); - if (tokenItr.hasNext()) { - Token token = tokenItr.next(); - sb.append(token.getService()).append("(").append(token.getKind()).append(")"); + sb.append(credentials.getAllTokens().stream() + .map(t -> String.format("%s(%s)", t.getService(), t.getKind())) + .collect(Collectors.joining(","))); - } - while(tokenItr.hasNext()) { - Token token = tokenItr.next(); - sb.append(",").append(token.getService()).append("(").append(token.getKind()).append(")"); - } + sb.append(", TokenDetails="); + sb.append(credentials.getAllTokens().stream().map(Token::toString) + .collect(Collectors.joining(","))); } return sb.toString(); } @@ -421,16 +436,14 @@ public static ByteBuffer convertJobTokenToBytes( Token jobToken) throws IOException { DataOutputBuffer dob = new DataOutputBuffer(); jobToken.write(dob); - ByteBuffer bb = ByteBuffer.wrap(dob.getData(), 0, dob.getLength()); - return bb; + return ByteBuffer.wrap(dob.getData(), 0, dob.getLength()); } public static Credentials parseCredentialsBytes(byte[] credentialsBytes) throws IOException { Credentials credentials = new Credentials(); DataInputBuffer dib = new DataInputBuffer(); try { - byte[] tokenBytes = credentialsBytes; - dib.reset(tokenBytes, tokenBytes.length); + dib.reset(credentialsBytes, credentialsBytes.length); credentials.readTokenStorageStream(dib); return credentials; } finally { @@ -445,7 +458,7 @@ public static void logCredentials(Logger log, Credentials credentials, String id } public static Collection tokenizeString(String str, String delim) { - List values = new ArrayList(); + List values = new ArrayList<>(); if (str == null || str.isEmpty()) return values; StringTokenizer tokenizer = new StringTokenizer(str, delim); @@ -519,7 +532,7 @@ public static long getAMClientHeartBeatTimeoutMillis(Configuration conf) { if (val > 0 && val < TezConstants.TEZ_AM_CLIENT_HEARTBEAT_TIMEOUT_SECS_MINIMUM) { return TezConstants.TEZ_AM_CLIENT_HEARTBEAT_TIMEOUT_SECS_MINIMUM * 1000; } - return val * 1000; + return val * 1000L; } /** @@ -556,7 +569,13 @@ public static long getDAGSessionTimeout(Configuration conf) { if (timeoutSecs == 0) { timeoutSecs = 1; } - return 1000l * timeoutSecs; + return 1000L * timeoutSecs; } + public static int getJavaVersion() { + String javaVersionString = System.getProperty("java.version"); + return javaVersionString.split("\\.")[0].equals("1") + ? Integer.parseInt(javaVersionString.split("\\.")[1]) // "1.8" -> 8 + : Integer.parseInt(javaVersionString.split("\\.")[0]); // "9.x" -> 9, "11.x" -> 11 + } } diff --git a/tez-api/src/main/java/org/apache/tez/common/TezUtils.java b/tez-api/src/main/java/org/apache/tez/common/TezUtils.java index dfdf9fa642..88920a4e53 100644 --- a/tez-api/src/main/java/org/apache/tez/common/TezUtils.java +++ b/tez-api/src/main/java/org/apache/tez/common/TezUtils.java @@ -20,17 +20,16 @@ import java.io.IOException; import java.io.OutputStream; -import java.util.Iterator; +import java.nio.ByteBuffer; import java.util.List; import java.util.Map; import java.util.Map.Entry; -import java.util.zip.Deflater; -import java.util.zip.DeflaterOutputStream; -import java.util.zip.InflaterInputStream; +import java.util.Objects; -import com.google.common.base.Preconditions; import com.google.protobuf.ByteString; +import com.google.protobuf.CodedInputStream; +import org.apache.tez.runtime.api.TaskContext; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.classification.InterfaceAudience; @@ -41,16 +40,20 @@ import org.apache.tez.dag.api.records.DAGProtos; import org.codehaus.jettison.json.JSONException; import org.codehaus.jettison.json.JSONObject; +import org.xerial.snappy.SnappyInputStream; +import org.xerial.snappy.SnappyOutputStream; /** * Utility methods for setting up a DAG. Has helpers for setting up log4j configuration, converting * {@link org.apache.hadoop.conf.Configuration} to {@link org.apache.tez.dag.api.UserPayload} etc. */ @InterfaceAudience.Public -public class TezUtils { +public final class TezUtils { private static final Logger LOG = LoggerFactory.getLogger(TezUtils.class); + private TezUtils() {} + /** * Allows changing the log level for task / AM logging.

* @@ -71,19 +74,12 @@ public static void addLog4jSystemProperties(String logLevel, * @param conf * : Configuration to be converted * @return PB ByteString (compressed) - * @throws java.io.IOException */ public static ByteString createByteStringFromConf(Configuration conf) throws IOException { - Preconditions.checkNotNull(conf, "Configuration must be specified"); + Objects.requireNonNull(conf, "Configuration must be specified"); ByteString.Output os = ByteString.newOutput(); - DeflaterOutputStream compressOs = new DeflaterOutputStream(os, - new Deflater(Deflater.BEST_SPEED)); - try { + try (SnappyOutputStream compressOs = new SnappyOutputStream(os)) { writeConfInPB(compressOs, conf); - } finally { - if (compressOs != null) { - compressOs.close(); - } } return os.toByteString(); } @@ -94,10 +90,15 @@ public static ByteString createByteStringFromConf(Configuration conf) throws IOE * * @param conf configuration to be converted * @return an instance of {@link org.apache.tez.dag.api.UserPayload} - * @throws java.io.IOException */ public static UserPayload createUserPayloadFromConf(Configuration conf) throws IOException { - return UserPayload.create(createByteStringFromConf(conf).asReadOnlyByteBuffer()); + return UserPayload.create(ByteBuffer.wrap(createByteStringFromConf(conf).toByteArray())); + } + + private static DAGProtos.ConfigurationProto createConfProto(SnappyInputStream uncompressIs) throws IOException { + CodedInputStream in = CodedInputStream.newInstance(uncompressIs); + in.setSizeLimit(Integer.MAX_VALUE); + return DAGProtos.ConfigurationProto.parseFrom(in); } /** @@ -106,17 +107,39 @@ public static UserPayload createUserPayloadFromConf(Configuration conf) throws I * @param byteString byteString representation of the conf created using {@link * #createByteStringFromConf(org.apache.hadoop.conf.Configuration)} * @return Configuration - * @throws java.io.IOException */ public static Configuration createConfFromByteString(ByteString byteString) throws IOException { - Preconditions.checkNotNull(byteString, "ByteString must be specified"); - // SnappyInputStream uncompressIs = new - // SnappyInputStream(byteString.newInput()); - InflaterInputStream uncompressIs = new InflaterInputStream(byteString.newInput()); - DAGProtos.ConfigurationProto confProto = DAGProtos.ConfigurationProto.parseFrom(uncompressIs); - Configuration conf = new Configuration(false); - readConfFromPB(confProto, conf); - return conf; + Objects.requireNonNull(byteString, "ByteString must be specified"); + try(SnappyInputStream uncompressIs = new SnappyInputStream(byteString.newInput())) { + DAGProtos.ConfigurationProto confProto = createConfProto(uncompressIs); + Configuration conf = new Configuration(false); + readConfFromPB(confProto, conf); + TezClassLoader.setupForConfiguration(conf); + return conf; + } + } + + public static Configuration createConfFromBaseConfAndPayload(TaskContext context) + throws IOException { + Configuration baseConf = context.getContainerConfiguration(); + Configuration configuration = new Configuration(baseConf); + UserPayload payload = context.getUserPayload(); + ByteString byteString = ByteString.copyFrom(payload.getPayload()); + try(SnappyInputStream uncompressIs = new SnappyInputStream(byteString.newInput())) { + DAGProtos.ConfigurationProto confProto = createConfProto(uncompressIs); + readConfFromPB(confProto, configuration); + TezClassLoader.setupForConfiguration(configuration); + return configuration; + } + } + + public static void addToConfFromByteString(Configuration configuration, ByteString byteString) + throws IOException { + try(SnappyInputStream uncompressIs = new SnappyInputStream(byteString.newInput())) { + DAGProtos.ConfigurationProto confProto = createConfProto(uncompressIs); + readConfFromPB(confProto, configuration); + TezClassLoader.setupForConfiguration(configuration); + } } /** @@ -126,7 +149,6 @@ public static Configuration createConfFromByteString(ByteString byteString) thro * @param payload {@link org.apache.tez.dag.api.UserPayload} created using {@link * #createUserPayloadFromConf(org.apache.hadoop.conf.Configuration)} * @return Configuration - * @throws java.io.IOException */ public static Configuration createConfFromUserPayload(UserPayload payload) throws IOException { return createConfFromByteString(ByteString.copyFrom(payload.getPayload())); @@ -134,16 +156,8 @@ public static Configuration createConfFromUserPayload(UserPayload payload) throw private static void writeConfInPB(OutputStream dos, Configuration conf) throws IOException { - DAGProtos.ConfigurationProto.Builder confProtoBuilder = DAGProtos.ConfigurationProto - .newBuilder(); - Iterator> iter = conf.iterator(); - while (iter.hasNext()) { - Map.Entry entry = iter.next(); - DAGProtos.PlanKeyValuePair.Builder kvp = DAGProtos.PlanKeyValuePair.newBuilder(); - kvp.setKey(entry.getKey()); - kvp.setValue(entry.getValue()); - confProtoBuilder.addConfKeyValues(kvp); - } + DAGProtos.ConfigurationProto.Builder confProtoBuilder = DAGProtos.ConfigurationProto.newBuilder(); + populateConfProtoFromEntries(conf, confProtoBuilder); DAGProtos.ConfigurationProto confProto = confProtoBuilder.build(); confProto.writeTo(dos); } @@ -164,10 +178,14 @@ public static String convertToHistoryText(String description, Configuration conf } if (conf != null) { JSONObject confJson = new JSONObject(); - Iterator> iter = conf.iterator(); - while (iter.hasNext()) { - Entry entry = iter.next(); - confJson.put(entry.getKey(), conf.get(entry.getKey())); + for (Entry entry : conf) { + String key = entry.getKey(); + String val = conf.get(entry.getKey()); + if (val != null) { + confJson.put(key, val); + } else { + LOG.debug("null value in Configuration after replacement for key={}. Skipping.", key); + } } jsonObject.put(ATSConstants.CONFIG, confJson); } @@ -181,4 +199,22 @@ public static String convertToHistoryText(Configuration conf) { return convertToHistoryText(null, conf); } + + /* Copy each Map.Entry with non-null value to DAGProtos.ConfigurationProto */ + public static void populateConfProtoFromEntries(Iterable> params, + DAGProtos.ConfigurationProto.Builder confBuilder) { + for(Map.Entry entry : params) { + String key = entry.getKey(); + String val = entry.getValue(); + if(val != null) { + DAGProtos.PlanKeyValuePair.Builder kvp = DAGProtos.PlanKeyValuePair.newBuilder(); + kvp.setKey(key); + kvp.setValue(val); + confBuilder.addConfKeyValues(kvp); + } else { + LOG.debug("null value for key={}. Skipping.", key); + } + } + } + } diff --git a/tez-api/src/main/java/org/apache/tez/common/TezYARNUtils.java b/tez-api/src/main/java/org/apache/tez/common/TezYARNUtils.java index bd6de11a59..1e5d4bdf4a 100644 --- a/tez-api/src/main/java/org/apache/tez/common/TezYARNUtils.java +++ b/tez-api/src/main/java/org/apache/tez/common/TezYARNUtils.java @@ -35,7 +35,7 @@ import org.apache.tez.dag.api.TezConstants; @Private -public class TezYARNUtils { +public final class TezYARNUtils { private static Logger LOG = LoggerFactory.getLogger(TezYARNUtils.class); public static final String ENV_NAME_REGEX = "[A-Za-z_][A-Za-z0-9_]*"; @@ -49,6 +49,8 @@ public class TezYARNUtils { + "([^,]*)" // val group ); + private TezYARNUtils() {} + public static String getFrameworkClasspath(Configuration conf, boolean usingArchive) { StringBuilder classpathBuilder = new StringBuilder(); boolean userClassesTakesPrecedence = @@ -126,9 +128,11 @@ private static void addUserSpecifiedClasspath(StringBuilder classpathBuilder, // Add PWD:PWD/* classpathBuilder.append(Environment.PWD.$()) - .append(File.pathSeparator) - .append(Environment.PWD.$() + File.separator + "*") - .append(File.pathSeparator); + .append(File.pathSeparator) + .append(Environment.PWD.$()) + .append(File.separator) + .append("*") + .append(File.pathSeparator); } public static void appendToEnvFromInputString(Map env, @@ -161,7 +165,7 @@ public static void appendToEnvFromInputString(Map env, public static void setEnvIfAbsentFromInputString(Map env, String envString) { if (envString != null && envString.length() > 0) { - String childEnvs[] = envString.split(","); + String[] childEnvs = envString.split(","); for (String cEnv : childEnvs) { String[] parts = cEnv.split("="); // split on '=' Matcher m = VAR_SUBBER .matcher(parts[1]); diff --git a/tez-api/src/main/java/org/apache/tez/common/VersionInfo.java b/tez-api/src/main/java/org/apache/tez/common/VersionInfo.java index 9f98974314..1afde0d3f4 100644 --- a/tez-api/src/main/java/org/apache/tez/common/VersionInfo.java +++ b/tez-api/src/main/java/org/apache/tez/common/VersionInfo.java @@ -42,6 +42,8 @@ public class VersionInfo { private static final String VERSION = "version"; private static final String REVISION = "revision"; private static final String BUILD_TIME = "buildtime"; + private static final String BUILD_USER = "builduser"; + private static final String BUILD_JAVA_VERSION = "buildjavaversion"; private static final String SCM_URL = "scmurl"; public static final String UNKNOWN = "Unknown"; @@ -66,13 +68,15 @@ protected VersionInfo(String component) { @VisibleForTesting @Private - protected VersionInfo(String component, String version, String revision, - String buildTime, String scmUrl) { + protected VersionInfo(String component, String version, String revision, String buildTime, + String scmUrl) { this.info = new Properties(); this.component = component; info.setProperty(VERSION, version); info.setProperty(REVISION, revision); info.setProperty(BUILD_TIME, buildTime); + info.setProperty(BUILD_USER, System.getProperty("user.name")); + info.setProperty(BUILD_JAVA_VERSION, System.getProperty("java.version")); info.setProperty(SCM_URL, scmUrl); } @@ -84,6 +88,14 @@ public String getBuildTime() { return info.getProperty(BUILD_TIME, UNKNOWN); } + public String getBuildUser() { + return info.getProperty(BUILD_USER, UNKNOWN); + } + + public String getBuildJavaVersion() { + return info.getProperty(BUILD_JAVA_VERSION, UNKNOWN); + } + public String getRevision() { return info.getProperty(REVISION, UNKNOWN); } @@ -99,6 +111,8 @@ public String toString() { + ", revision=" + getRevision() + ", SCM-URL=" + getSCMURL() + ", buildTime=" + getBuildTime() + + ", buildUser=" + getBuildUser() + + ", buildJavaVersion=" + getBuildJavaVersion() + " ]"; } diff --git a/tez-api/src/main/java/org/apache/tez/common/counters/AbstractCounter.java b/tez-api/src/main/java/org/apache/tez/common/counters/AbstractCounter.java index 7bc1109518..befc7c023e 100644 --- a/tez-api/src/main/java/org/apache/tez/common/counters/AbstractCounter.java +++ b/tez-api/src/main/java/org/apache/tez/common/counters/AbstractCounter.java @@ -49,4 +49,9 @@ public synchronized boolean equals(Object genericRight) { public synchronized int hashCode() { return Objects.hashCode(getName(), getDisplayName(), getValue()); } + + @Override + public String toString() { + return "[" + getClass().getSimpleName() + "]: " + getDisplayName() + "=" + getValue(); + } } diff --git a/tez-api/src/main/java/org/apache/tez/common/counters/AbstractCounterGroup.java b/tez-api/src/main/java/org/apache/tez/common/counters/AbstractCounterGroup.java index a4b153f6cd..1d1b56d062 100644 --- a/tez-api/src/main/java/org/apache/tez/common/counters/AbstractCounterGroup.java +++ b/tez-api/src/main/java/org/apache/tez/common/counters/AbstractCounterGroup.java @@ -194,10 +194,15 @@ public synchronized int hashCode() { @Override public void incrAllCounters(CounterGroupBase rightGroup) { + aggrAllCounters(rightGroup); + } + + @Override + public void aggrAllCounters(CounterGroupBase rightGroup) { try { for (TezCounter right : rightGroup) { TezCounter left = findCounter(right.getName(), right.getDisplayName()); - left.increment(right.getValue()); + left.aggregate(right); } } catch (LimitExceededException e) { counters.clear(); diff --git a/tez-api/src/main/java/org/apache/tez/common/counters/AbstractCounters.java b/tez-api/src/main/java/org/apache/tez/common/counters/AbstractCounters.java index 470cb78fdd..58f73b68bd 100644 --- a/tez-api/src/main/java/org/apache/tez/common/counters/AbstractCounters.java +++ b/tez-api/src/main/java/org/apache/tez/common/counters/AbstractCounters.java @@ -354,13 +354,22 @@ public synchronized String toString() { * @param other the other Counters instance */ public synchronized void incrAllCounters(AbstractCounters other) { + aggrAllCounters(other); + } + + /** + * Increments multiple counters by their amounts in another Counters + * instance. + * @param other the other Counters instance + */ + public synchronized void aggrAllCounters(AbstractCounters other) { for(G right : other) { String groupName = right.getName(); G left = (isFrameworkGroup(groupName) ? fgroups : groups).get(groupName); if (left == null) { left = addGroup(groupName, right.getDisplayName()); } - left.incrAllCounters(right); + left.aggrAllCounters(right); } } @@ -392,7 +401,7 @@ public void setWriteAllCounters(boolean send) { /** * Get the "writeAllCounters" option - * @return true of all counters would serialized + * @return true if all counters would be serialized */ @InterfaceAudience.Private public boolean getWriteAllCounters() { diff --git a/tez-api/src/main/java/org/apache/tez/common/counters/AggregateFrameworkCounter.java b/tez-api/src/main/java/org/apache/tez/common/counters/AggregateFrameworkCounter.java new file mode 100644 index 0000000000..aa7d4462cf --- /dev/null +++ b/tez-api/src/main/java/org/apache/tez/common/counters/AggregateFrameworkCounter.java @@ -0,0 +1,85 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tez.common.counters; + +import org.apache.tez.common.counters.FrameworkCounterGroup.FrameworkCounter; + +@SuppressWarnings("rawtypes") +public class AggregateFrameworkCounter> extends FrameworkCounter implements AggregateTezCounter { + + private long min = Long.MAX_VALUE; + private long max = Long.MIN_VALUE; + private long count = 0; + + @SuppressWarnings("unchecked") + public AggregateFrameworkCounter(Enum ref, String groupName) { + super(ref, groupName); + } + + @Override + public void increment(long incr) { + throw new IllegalArgumentException("Cannot increment an aggregate counter directly"); + } + + @Override + public void aggregate(TezCounter other) { + final long val = other.getValue(); + final long othermax; + final long othermin; + final long othercount; + if (other instanceof AggregateTezCounter) { + othermax = ((AggregateTezCounter) other).getMax(); + othermin = ((AggregateTezCounter) other).getMin(); + othercount = ((AggregateTezCounter) other).getCount(); + } else { + othermin = othermax = val; + othercount = 1; + } + this.count += othercount; + super.increment(val); + if (this.min == Long.MAX_VALUE) { + this.min = othermin; + this.max = othermax; + return; + } + this.min = Math.min(this.min, othermin); + this.max = Math.max(this.max, othermax); + } + + @Override + public long getMin() { + return min; + } + + @Override + public long getMax() { + return max; + } + + @SuppressWarnings("unchecked") + public FrameworkCounter asFrameworkCounter() { + return ((FrameworkCounter)this); + } + + @Override + public long getCount() { + return count; + } + +} diff --git a/tez-api/src/main/java/org/apache/tez/common/counters/AggregateTezCounter.java b/tez-api/src/main/java/org/apache/tez/common/counters/AggregateTezCounter.java new file mode 100644 index 0000000000..bf711dae86 --- /dev/null +++ b/tez-api/src/main/java/org/apache/tez/common/counters/AggregateTezCounter.java @@ -0,0 +1,31 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tez.common.counters; + +public interface AggregateTezCounter { + + public abstract void aggregate(TezCounter other); + + public abstract long getMin(); + + public abstract long getMax(); + + public abstract long getCount(); + +} \ No newline at end of file diff --git a/tez-api/src/main/java/org/apache/tez/common/counters/AggregateTezCounterDelegate.java b/tez-api/src/main/java/org/apache/tez/common/counters/AggregateTezCounterDelegate.java new file mode 100644 index 0000000000..ae2ca7b0c6 --- /dev/null +++ b/tez-api/src/main/java/org/apache/tez/common/counters/AggregateTezCounterDelegate.java @@ -0,0 +1,118 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tez.common.counters; + +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; + +public class AggregateTezCounterDelegate extends AbstractCounter implements AggregateTezCounter { + + private final T child; + private long min = Long.MAX_VALUE; + private long max = Long.MIN_VALUE; + private long count = 0; + + public AggregateTezCounterDelegate(T child) { + this.child = child; + } + + @Override + public String getName() { + return child.getName(); // this is a pass-through + } + + @Override + public String getDisplayName() { + return child.getDisplayName(); + } + + @Override + public long getValue() { + return child.getValue(); + } + + @Override + public void setValue(long value) { + this.child.setValue(value); + } + + @Override + public void increment(long incr) { + throw new UnsupportedOperationException("Cannot increment an aggregate counter"); + } + + /* (non-Javadoc) + * @see org.apache.tez.common.counters.AggregateTezCounter#aggregate(org.apache.tez.common.counters.TezCounter) + */ + @Override + public void aggregate(TezCounter other) { + final long val = other.getValue(); + final long othermax; + final long othermin; + final long othercount; + if (other instanceof AggregateTezCounter) { + othermax = ((AggregateTezCounter) other).getMax(); + othermin = ((AggregateTezCounter) other).getMin(); + othercount = ((AggregateTezCounter) other).getCount(); + } else { + othermin = othermax = val; + othercount = 1; + } + this.count += othercount; + this.child.increment(val); + if (this.min == Long.MAX_VALUE) { + this.min = othermin; + this.max = othermax; + return; + } + this.min = Math.min(this.min, othermin); + this.max = Math.max(this.max, othermax); + } + + @Override + public TezCounter getUnderlyingCounter() { + return this.child; + } + + @Override + public void readFields(DataInput arg0) throws IOException { + throw new UnsupportedOperationException("Cannot deserialize an aggregate counter"); + } + + @Override + public void write(DataOutput arg0) throws IOException { + throw new UnsupportedOperationException("Cannot deserialize an aggregate counter"); + } + + @Override + public long getMin() { + return min; + } + + @Override + public long getMax() { + return max; + } + + @Override + public long getCount() { + return count; + } +} diff --git a/tez-api/src/main/java/org/apache/tez/common/counters/AggregateTezCounters.java b/tez-api/src/main/java/org/apache/tez/common/counters/AggregateTezCounters.java new file mode 100644 index 0000000000..332c24a960 --- /dev/null +++ b/tez-api/src/main/java/org/apache/tez/common/counters/AggregateTezCounters.java @@ -0,0 +1,119 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tez.common.counters; + +public class AggregateTezCounters extends TezCounters { + + private static final GroupFactory groupFactory = new GroupFactory(); + + public AggregateTezCounters() { + super(groupFactory); + } + + // Mix framework group implementation into CounterGroup interface + private static class AggregateFrameworkGroupImpl> + extends FrameworkCounterGroup implements CounterGroup { + + AggregateFrameworkGroupImpl(Class cls) { + super(cls); + } + + @Override + protected FrameworkCounter newCounter(T key) { + return (new AggregateFrameworkCounter(key, getName())) + .asFrameworkCounter(); + } + + @Override + public CounterGroupBase getUnderlyingGroup() { + return this; + } + } + + // Mix generic group implementation into CounterGroup interface + // and provide some mandatory group factory methods. + private static class AggregateGenericGroup extends AbstractCounterGroup + implements CounterGroup { + + AggregateGenericGroup(String name, String displayName, Limits limits) { + super(name, displayName, limits); + } + + @Override + protected TezCounter newCounter(String name, String displayName, long value) { + return new AggregateTezCounterDelegate(new GenericCounter(name, displayName, value)); + } + + @Override + protected TezCounter newCounter() { + return new AggregateTezCounterDelegate(new GenericCounter()); + } + + @Override + public CounterGroupBase getUnderlyingGroup() { + return this; + } + } + + // Mix file system group implementation into the CounterGroup interface + private static class AggregateFileSystemGroup extends FileSystemCounterGroup + implements CounterGroup { + + @Override + protected TezCounter newCounter(String scheme, FileSystemCounter key) { + return new AggregateTezCounterDelegate(new FSCounter(scheme, key)); + } + + @Override + public CounterGroupBase getUnderlyingGroup() { + return this; + } + } + + /** + * Provide factory methods for counter group factory implementation. + * See also the GroupFactory in + * {@link org.apache.hadoop.TezCounters.Counters mapred.Counters} + */ + private static class GroupFactory + extends CounterGroupFactory { + + @Override + protected > + FrameworkGroupFactory + newFrameworkGroupFactory(final Class cls) { + return new FrameworkGroupFactory() { + @Override public CounterGroup newGroup(String name) { + return new AggregateFrameworkGroupImpl(cls); // impl in this package + } + }; + } + + @Override + protected CounterGroup newGenericGroup(String name, String displayName, + Limits limits) { + return new AggregateGenericGroup(name, displayName, limits); + } + + @Override + protected CounterGroup newFileSystemGroup() { + return new AggregateFileSystemGroup(); + } + } +} diff --git a/tez-api/src/main/java/org/apache/tez/common/counters/CounterGroupBase.java b/tez-api/src/main/java/org/apache/tez/common/counters/CounterGroupBase.java index be4bf77e83..ebcc7b0e21 100644 --- a/tez-api/src/main/java/org/apache/tez/common/counters/CounterGroupBase.java +++ b/tez-api/src/main/java/org/apache/tez/common/counters/CounterGroupBase.java @@ -76,7 +76,7 @@ public interface CounterGroupBase /** * Find a counter in the group * @param counterName the name of the counter - * @param create create the counter if not found if true + * @param create create the counter if not found is true * @return the counter that was found or added or null if create is false */ T findCounter(String counterName, boolean create); @@ -97,8 +97,17 @@ public interface CounterGroupBase * Increment all counters by a group of counters * @param rightGroup the group to be added to this group */ + @Deprecated void incrAllCounters(CounterGroupBase rightGroup); - + + /** + * Aggregate all counters by a group of counters + * @param rightGroup the group to be added to this group + */ + public default void aggrAllCounters(CounterGroupBase rightGroup) { + incrAllCounters(rightGroup); + } + @Private /** * Exposes the underlying group type if a facade. diff --git a/tez-api/src/main/java/org/apache/tez/common/counters/DAGCounter.java b/tez-api/src/main/java/org/apache/tez/common/counters/DAGCounter.java index 5064c35c5a..ca575d4dfa 100644 --- a/tez-api/src/main/java/org/apache/tez/common/counters/DAGCounter.java +++ b/tez-api/src/main/java/org/apache/tez/common/counters/DAGCounter.java @@ -22,6 +22,7 @@ import org.apache.hadoop.classification.InterfaceStability; // Per-job counters +// Keep in sync with tez-ui/src/main/webapp/config/default-app-conf.js @InterfaceAudience.Public @InterfaceStability.Evolving public enum DAGCounter { @@ -29,6 +30,28 @@ public enum DAGCounter { NUM_KILLED_TASKS, NUM_SUCCEEDED_TASKS, TOTAL_LAUNCHED_TASKS, + + /* The durations of task attempts are categorized based on their final states. The duration of successful tasks + can serve as a reference when analyzing the durations of failed or killed tasks. This is because solely examining + failed or killed task durations may be misleading, as these durations are measured from the submission time, + which does not always correspond to the actual start time of the task attempt on executor nodes + (e.g., in scenarios involving Hive LLAP). + These counters align with the duration metrics used for WALL_CLOCK_MILLIS. + As such, the following relationship applies: + WALL_CLOCK_MILLIS = DURATION_FAILED_TASKS_MILLIS + DURATION_KILLED_TASKS_MILLIS + DURATION_SUCCEEDED_TASKS_MILLIS + */ + + // Total amount of time spent on running FAILED task attempts. This can be blamed for performance degradation, as a + // DAG can still finish successfully in the presence of failed attempts. + DURATION_FAILED_TASKS_MILLIS, + + // Total amount of time spent on running KILLED task attempts. + DURATION_KILLED_TASKS_MILLIS, + + // Total amount of time spent on running SUCCEEDED task attempts, which can be a reference together with the same for + // FAILED and KILLED attempts. + DURATION_SUCCEEDED_TASKS_MILLIS, + OTHER_LOCAL_TASKS, DATA_LOCAL_TASKS, RACK_LOCAL_TASKS, @@ -38,5 +61,66 @@ public enum DAGCounter { NUM_UBER_SUBTASKS, NUM_FAILED_UBERTASKS, AM_CPU_MILLISECONDS, - AM_GC_TIME_MILLIS + /** Wall clock time taken by all the tasks. */ + WALL_CLOCK_MILLIS, + AM_GC_TIME_MILLIS, + + /* + * Type: # of containers + * Both allocated and launched containers before DAG start. + * This is incremented only once when the DAG starts and it's calculated + * by querying all the held containers from TaskSchedulers. + */ + INITIAL_HELD_CONTAINERS, + + /* + * Type: # of containers + * All containers that have been seen/used in this DAG by task allocation. + * This counter can be calculated at the end of DAG by simply counting the distinct + * ContainerIds that have been seen in TaskSchedulerManager.taskAllocated callbacks. + */ + TOTAL_CONTAINERS_USED, + + /* + * Type: # of events + * Number of container allocations during a DAG. This is incremented every time + * the containerAllocated callback is called in the TaskSchedulerContext. + * This counter doesn't account for initially held (launched, allocated) containers. + */ + TOTAL_CONTAINER_ALLOCATION_COUNT, + + /* + * Type: # of events + * Number of container launches during a DAG. This is incremented every time + * the containerLaunched callback is called in the ContainerLauncherContext. + * This counter doesn't account for initially held (launched, allocated) containers. + */ + TOTAL_CONTAINER_LAUNCH_COUNT, + + /* + * Type: # of events + * Number of container releases during a DAG. This is incremented every time + * the containerBeingReleased callback is called in the TaskSchedulerContext. + */ + TOTAL_CONTAINER_RELEASE_COUNT, + + /* + * Type: # of events + * Number of container reuses during a DAG. This is incremented every time + * the containerReused callback is called in the TaskSchedulerContext. + */ + TOTAL_CONTAINER_REUSE_COUNT, + + /* + * Number of nodes to which task attempts were assigned in this DAG. + * Nodes are distinguished by the Yarn NodeId.getHost(). + */ + NODE_USED_COUNT, + + /* + * Total number of nodes visible to the task scheduler (regardless of + * task assignments). This is typically exposed by a resource manager + * client. + */ + NODE_TOTAL_COUNT } diff --git a/tez-api/src/main/java/org/apache/tez/common/counters/FileSystemCounter.java b/tez-api/src/main/java/org/apache/tez/common/counters/FileSystemCounter.java index 73e358179b..fdb93f1194 100644 --- a/tez-api/src/main/java/org/apache/tez/common/counters/FileSystemCounter.java +++ b/tez-api/src/main/java/org/apache/tez/common/counters/FileSystemCounter.java @@ -19,16 +19,68 @@ package org.apache.tez.common.counters; import org.apache.hadoop.classification.InterfaceAudience.Private; +import org.apache.hadoop.fs.StorageStatistics.CommonStatisticNames; +/** + * FileSystemCounter is an enum for defining which filesystem/storage statistics are exposed in Tez. + */ @Private public enum FileSystemCounter { - BYTES_READ, - BYTES_WRITTEN, - READ_OPS, - LARGE_READ_OPS, - WRITE_OPS, - HDFS_BYTES_READ, - HDFS_BYTES_WRITTEN, - FILE_BYTES_READ, - FILE_BYTES_WRITTEN + BYTES_READ("bytesRead"), + BYTES_WRITTEN("bytesWritten"), + READ_OPS("readOps"), + LARGE_READ_OPS("largeReadOps"), + WRITE_OPS("writeOps"), + + // Additional counters from HADOOP-13305 + OP_APPEND(CommonStatisticNames.OP_APPEND), + OP_COPY_FROM_LOCAL_FILE(CommonStatisticNames.OP_COPY_FROM_LOCAL_FILE), + OP_CREATE(CommonStatisticNames.OP_CREATE), + OP_CREATE_NON_RECURSIVE(CommonStatisticNames.OP_CREATE_NON_RECURSIVE), + OP_DELETE(CommonStatisticNames.OP_DELETE), + OP_EXISTS(CommonStatisticNames.OP_EXISTS), + OP_GET_CONTENT_SUMMARY(CommonStatisticNames.OP_GET_CONTENT_SUMMARY), + OP_GET_DELEGATION_TOKEN(CommonStatisticNames.OP_GET_DELEGATION_TOKEN), + OP_GET_FILE_CHECKSUM(CommonStatisticNames.OP_GET_FILE_CHECKSUM), + OP_GET_FILE_STATUS(CommonStatisticNames.OP_GET_FILE_STATUS), + OP_GET_STATUS(CommonStatisticNames.OP_GET_STATUS), + OP_GLOB_STATUS(CommonStatisticNames.OP_GLOB_STATUS), + OP_IS_FILE(CommonStatisticNames.OP_IS_FILE), + OP_IS_DIRECTORY(CommonStatisticNames.OP_IS_DIRECTORY), + OP_LIST_FILES(CommonStatisticNames.OP_LIST_FILES), + OP_LIST_LOCATED_STATUS(CommonStatisticNames.OP_LIST_LOCATED_STATUS), + OP_LIST_STATUS(CommonStatisticNames.OP_LIST_STATUS), + OP_MKDIRS(CommonStatisticNames.OP_MKDIRS), + OP_MODIFY_ACL_ENTRIES(CommonStatisticNames.OP_MODIFY_ACL_ENTRIES), + OP_OPEN(CommonStatisticNames.OP_OPEN), + OP_REMOVE_ACL(CommonStatisticNames.OP_REMOVE_ACL), + OP_REMOVE_ACL_ENTRIES(CommonStatisticNames.OP_REMOVE_ACL_ENTRIES), + OP_REMOVE_DEFAULT_ACL(CommonStatisticNames.OP_REMOVE_DEFAULT_ACL), + OP_RENAME(CommonStatisticNames.OP_RENAME), + OP_SET_ACL(CommonStatisticNames.OP_SET_ACL), + OP_SET_OWNER(CommonStatisticNames.OP_SET_OWNER), + OP_SET_PERMISSION(CommonStatisticNames.OP_SET_PERMISSION), + OP_SET_TIMES(CommonStatisticNames.OP_SET_TIMES), + OP_TRUNCATE(CommonStatisticNames.OP_TRUNCATE), + + // counters below are not needed in production, as the scheme_countername expansion is taken care of by the + // FileSystemCounterGroup, the only reason they are here is that some analyzers still depend on them + @Deprecated + HDFS_BYTES_READ("hdfsBytesRead"), + @Deprecated + HDFS_BYTES_WRITTEN("hdfsBytesWritten"), + @Deprecated + FILE_BYTES_READ("fileBytesRead"), + @Deprecated + FILE_BYTES_WRITTEN("fileBytesWritten"); + + private final String opName; + + FileSystemCounter(String opName) { + this.opName = opName; + } + + public String getOpName() { + return opName; + } } diff --git a/tez-api/src/main/java/org/apache/tez/common/counters/FileSystemCounterGroup.java b/tez-api/src/main/java/org/apache/tez/common/counters/FileSystemCounterGroup.java index 502415484a..76e5235d27 100644 --- a/tez-api/src/main/java/org/apache/tez/common/counters/FileSystemCounterGroup.java +++ b/tez-api/src/main/java/org/apache/tez/common/counters/FileSystemCounterGroup.java @@ -27,11 +27,10 @@ import java.util.Iterator; import java.util.Locale; import java.util.Map; +import java.util.Objects; import com.google.common.base.Joiner; -import static com.google.common.base.Preconditions.*; - import com.google.common.collect.AbstractIterator; import com.google.common.collect.Iterators; import com.google.common.collect.Maps; @@ -225,12 +224,17 @@ public int size() { } @Override - public void incrAllCounters(CounterGroupBase other) { - if (checkNotNull(other.getUnderlyingGroup(), "other group") + public void incrAllCounters(CounterGroupBase rightGroup) { + aggrAllCounters(rightGroup); + } + + @Override + public void aggrAllCounters(CounterGroupBase other) { + if (Objects.requireNonNull(other.getUnderlyingGroup(), "other group") instanceof FileSystemCounterGroup) { for (TezCounter counter : other) { FSCounter c = (FSCounter) ((TezCounter)counter).getUnderlyingCounter(); - findCounter(c.scheme, c.key) .increment(counter.getValue()); + findCounter(c.scheme, c.key) .aggregate(counter); } } } diff --git a/tez-api/src/main/java/org/apache/tez/common/counters/FrameworkCounterGroup.java b/tez-api/src/main/java/org/apache/tez/common/counters/FrameworkCounterGroup.java index 3a4aa9754e..cb27741283 100644 --- a/tez-api/src/main/java/org/apache/tez/common/counters/FrameworkCounterGroup.java +++ b/tez-api/src/main/java/org/apache/tez/common/counters/FrameworkCounterGroup.java @@ -18,13 +18,14 @@ package org.apache.tez.common.counters; -import static com.google.common.base.Preconditions.checkNotNull; + import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; import java.util.Arrays; import java.util.Iterator; +import java.util.Objects; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.io.WritableUtils; @@ -190,14 +191,20 @@ public int size() { return n; } + @Override + @SuppressWarnings("deprecation") + public void incrAllCounters(CounterGroupBase rightGroup) { + aggrAllCounters(rightGroup); + } + @SuppressWarnings("rawtypes") @Override - public void incrAllCounters(CounterGroupBase other) { - if (checkNotNull(other, "other counter group") + public void aggrAllCounters(CounterGroupBase other) { + if (Objects.requireNonNull(other, "other counter group") instanceof FrameworkCounterGroup) { for (TezCounter counter : other) { findCounter(((FrameworkCounter) counter).key.name()) - .increment(counter.getValue()); + .aggregate(counter); } } } diff --git a/tez-api/src/main/java/org/apache/tez/common/counters/Limits.java b/tez-api/src/main/java/org/apache/tez/common/counters/Limits.java index 332df8e1e0..3e926c693b 100644 --- a/tez-api/src/main/java/org/apache/tez/common/counters/Limits.java +++ b/tez-api/src/main/java/org/apache/tez/common/counters/Limits.java @@ -30,33 +30,34 @@ public class Limits { private static final Logger LOG = LoggerFactory.getLogger(Limits.class); + private static final Configuration DEFAULT_CONFIGURATION = new TezConfiguration(); private static Configuration conf = null; private static int GROUP_NAME_MAX; private static int COUNTER_NAME_MAX; private static int GROUPS_MAX; private static int COUNTERS_MAX; - private static boolean initialized = false; - private static synchronized void ensureInitialized() { - if (initialized) { - return; - } - if (conf == null) { - conf = new TezConfiguration(); + static { + init(DEFAULT_CONFIGURATION); + } + + public synchronized static void setConfiguration(Configuration conf) { + // see change to reset() + if (Limits.conf == DEFAULT_CONFIGURATION && conf != null) { + init(conf); } - GROUP_NAME_MAX = - conf.getInt(TezConfiguration.TEZ_COUNTERS_GROUP_NAME_MAX_LENGTH, - TezConfiguration.TEZ_COUNTERS_GROUP_NAME_MAX_LENGTH_DEFAULT); - COUNTER_NAME_MAX = - conf.getInt(TezConfiguration.TEZ_COUNTERS_COUNTER_NAME_MAX_LENGTH, - TezConfiguration.TEZ_COUNTERS_COUNTER_NAME_MAX_LENGTH_DEFAULT); - GROUPS_MAX = - conf.getInt(TezConfiguration.TEZ_COUNTERS_MAX_GROUPS, - TezConfiguration.TEZ_COUNTERS_MAX_GROUPS_DEFAULT); + } + + private static void init(Configuration conf) { + Limits.conf = conf; + GROUP_NAME_MAX = conf.getInt(TezConfiguration.TEZ_COUNTERS_GROUP_NAME_MAX_LENGTH, + TezConfiguration.TEZ_COUNTERS_GROUP_NAME_MAX_LENGTH_DEFAULT); + COUNTER_NAME_MAX = conf.getInt(TezConfiguration.TEZ_COUNTERS_COUNTER_NAME_MAX_LENGTH, + TezConfiguration.TEZ_COUNTERS_COUNTER_NAME_MAX_LENGTH_DEFAULT); + GROUPS_MAX = conf.getInt(TezConfiguration.TEZ_COUNTERS_MAX_GROUPS, + TezConfiguration.TEZ_COUNTERS_MAX_GROUPS_DEFAULT); COUNTERS_MAX = - conf.getInt(TezConfiguration.TEZ_COUNTERS_MAX, - TezConfiguration.TEZ_COUNTERS_MAX_DEFAULT); - initialized = true; + conf.getInt(TezConfiguration.TEZ_COUNTERS_MAX, TezConfiguration.TEZ_COUNTERS_MAX_DEFAULT); LOG.info("Counter limits initialized with parameters: " + " GROUP_NAME_MAX=" + GROUP_NAME_MAX + ", MAX_GROUPS=" + GROUPS_MAX + ", COUNTER_NAME_MAX=" + COUNTER_NAME_MAX + ", MAX_COUNTERS=" + COUNTERS_MAX); @@ -70,17 +71,14 @@ public static String filterName(String name, int maxLen) { } public static String filterCounterName(String name) { - ensureInitialized(); return filterName(name, COUNTER_NAME_MAX); } public static String filterGroupName(String name) { - ensureInitialized(); return filterName(name, GROUP_NAME_MAX); } public synchronized void checkCounters(int size) { - ensureInitialized(); if (firstViolation != null) { throw new LimitExceededException(firstViolation); } @@ -97,7 +95,6 @@ public synchronized void incrCounters() { } public synchronized void checkGroups(int size) { - ensureInitialized(); if (firstViolation != null) { throw new LimitExceededException(firstViolation); } @@ -107,21 +104,10 @@ public synchronized void checkGroups(int size) { } } - public synchronized LimitExceededException violation() { - return firstViolation; - } - - public synchronized static void setConfiguration(Configuration conf) { - if (Limits.conf == null && conf != null) { - Limits.conf = conf; - } - } - @VisibleForTesting @InterfaceAudience.Private public synchronized static void reset() { - conf = null; - initialized = false; + conf = DEFAULT_CONFIGURATION; } } diff --git a/tez-api/src/main/java/org/apache/tez/common/counters/TaskCounter.java b/tez-api/src/main/java/org/apache/tez/common/counters/TaskCounter.java index 2f18bc677b..19d5bbdffa 100644 --- a/tez-api/src/main/java/org/apache/tez/common/counters/TaskCounter.java +++ b/tez-api/src/main/java/org/apache/tez/common/counters/TaskCounter.java @@ -21,10 +21,11 @@ import org.apache.hadoop.classification.InterfaceAudience.Private; // Counters used by Task classes +// Keep in sync with tez-ui/src/main/webapp/config/default-app-conf.js @Private public enum TaskCounter { // TODO Eventually, rename counters to be non-MR specific and map them to MR equivalent. - + NUM_SPECULATIONS, /** @@ -38,29 +39,29 @@ public enum TaskCounter { * Alternately number of records seen by a ReduceProcessor */ REDUCE_INPUT_RECORDS, - + REDUCE_OUTPUT_RECORDS, // Not used at the moment. REDUCE_SKIPPED_GROUPS, // Not used at the moment. REDUCE_SKIPPED_RECORDS, // Not used at the moment. SPLIT_RAW_BYTES, - + COMBINE_INPUT_RECORDS, COMBINE_OUTPUT_RECORDS, // Not used at the moment. /** * Number of records written to disk in case of OnFileSortedOutput. - * - * Number of additional records writtent out to disk in case of + * + * Number of additional records written out to disk in case of * ShuffledMergedInput; this represents the number of unnecessary spills to * disk caused by lac of memory. */ SPILLED_RECORDS, /** - * Number of Inputs from which data is copied. Represents physical Inputs. + * Number of Inputs from which data is copied. Represents physical Inputs. */ NUM_SHUFFLED_INPUTS, - + /** * Number of Inputs from which data was not copied - typically due to an empty Input */ @@ -70,10 +71,12 @@ public enum TaskCounter { * Number of failed copy attempts (physical inputs) */ NUM_FAILED_SHUFFLE_INPUTS, - + MERGED_MAP_OUTPUTS, GC_TIME_MILLIS, CPU_MILLISECONDS, + /** Wall clock time taken by the task initialization and execution. */ + WALL_CLOCK_MILLISECONDS, PHYSICAL_MEMORY_BYTES, VIRTUAL_MEMORY_BYTES, COMMITTED_HEAP_BYTES, @@ -81,7 +84,7 @@ public enum TaskCounter { /** * Represents the number of Input Records that were actually processed. * Used by MRInput and ShuffledUnorderedKVInput - * + * */ INPUT_RECORDS_PROCESSED, @@ -90,19 +93,26 @@ public enum TaskCounter { */ INPUT_SPLIT_LENGTH_BYTES, - // + // /** * Represents the number of actual output records. * Used by MROutput, OnFileSortedOutput, and OnFileUnorderedKVOutput */ OUTPUT_RECORDS, - + + /** + * Approximate number of input records that should be processed as the event keeps arriving from + * inputs. + * //TODO: As of now supporting broadcast data only. + */ + APPROXIMATE_INPUT_RECORDS, + /** * Represent the number of large records in the output - typically, records which are * spilled directly */ OUTPUT_LARGE_RECORDS, - + SKIPPED_RECORDS, // Not used at the moment. /** @@ -122,19 +132,19 @@ public enum TaskCounter { * size + overhead) */ OUTPUT_BYTES_PHYSICAL, - + /** * Bytes written to disk due to unnecessary spills (lac of adequate memory). * Used by OnFileSortedOutput and ShuffledMergedInput */ ADDITIONAL_SPILLS_BYTES_WRITTEN, - + /** * Bytes read from disk due to previous spills (lac of adequate memory). * Used by OnFileSortedOutput and ShuffledMergedInput */ ADDITIONAL_SPILLS_BYTES_READ, - + /** * Spills that were generated & read by the same task (unnecessary spills due to lac of * adequate memory). @@ -149,7 +159,7 @@ public enum TaskCounter { * as final merge is avoided. */ SHUFFLE_CHUNK_COUNT, - + INPUT_GROUPS, // Not used at the moment. Will eventually replace REDUCE_INPUT_GROUPS /** @@ -162,15 +172,15 @@ public enum TaskCounter { * Uncompressed size of the data being processed by the relevant Shuffle. * Includes serialization, file format etc overheads. */ - SHUFFLE_BYTES_DECOMPRESSED, + SHUFFLE_BYTES_DECOMPRESSED, /** - * Number of bytes which were shuffled directly to memory. + * Number of bytes which were shuffled directly to memory. */ SHUFFLE_BYTES_TO_MEM, /** - * Number of bytes which were shuffled directly to disk + * Number of bytes which were shuffled directly to disk */ SHUFFLE_BYTES_TO_DISK, @@ -218,5 +228,13 @@ public enum TaskCounter { * * Represented in milliseconds */ - LAST_EVENT_RECEIVED + LAST_EVENT_RECEIVED, + + + /** + * The size of the data that is transmitted via event. + * + * Represented in number of bytes + */ + DATA_BYTES_VIA_EVENT } \ No newline at end of file diff --git a/tez-api/src/main/java/org/apache/tez/common/counters/TezCounter.java b/tez-api/src/main/java/org/apache/tez/common/counters/TezCounter.java index 2b40ed2dbc..9abbb82167 100644 --- a/tez-api/src/main/java/org/apache/tez/common/counters/TezCounter.java +++ b/tez-api/src/main/java/org/apache/tez/common/counters/TezCounter.java @@ -73,10 +73,18 @@ public interface TezCounter extends Writable { * @param incr the value to increase this counter by */ void increment(long incr); - + + /** + * Aggregate this counter with another counter + * @param other TezCounter to aggregate with, by default this is incr(other.getValue()) + */ + public default void aggregate(TezCounter other) { + increment(other.getValue()); + }; + /** * Return the underlying object if this is a facade. - * @return the undelying object. + * @return the underlying object. */ @Private TezCounter getUnderlyingCounter(); diff --git a/tez-api/src/main/java/org/apache/tez/common/counters/TezCounters.java b/tez-api/src/main/java/org/apache/tez/common/counters/TezCounters.java index ca03f415ba..a1205b9fdd 100644 --- a/tez-api/src/main/java/org/apache/tez/common/counters/TezCounters.java +++ b/tez-api/src/main/java/org/apache/tez/common/counters/TezCounters.java @@ -128,7 +128,17 @@ protected CounterGroup newFileSystemGroup() { * Default constructor */ public TezCounters() { - super(groupFactory); + this(groupFactory); + } + + /** + * Construct the Counters object from the another counters object + * @param the type of counter + * @param the type of counter group + */ + public > TezCounters( + CounterGroupFactory customGroupFactory) { + super(customGroupFactory); } /** diff --git a/tez-api/src/main/java/org/apache/tez/common/security/HistoryACLPolicyManager.java b/tez-api/src/main/java/org/apache/tez/common/security/HistoryACLPolicyManager.java index 92eea67440..cb4efc6fb6 100644 --- a/tez-api/src/main/java/org/apache/tez/common/security/HistoryACLPolicyManager.java +++ b/tez-api/src/main/java/org/apache/tez/common/security/HistoryACLPolicyManager.java @@ -30,7 +30,7 @@ /** * ACL Policy Manager - * An instance of this implements any ACL related activity when starting a session or submitting a + * An instance of this implements any ACL related activity when starting a session or submitting a * DAG. It is used in the HistoryLoggingService to create domain ids and populate entities with * domain id. */ @@ -41,7 +41,7 @@ public interface HistoryACLPolicyManager extends Configurable { /** * Take any necessary steps for setting up both Session ACLs and non session acls. This is called * with the am configuration which contains the ACL information to be used to create a domain. - * If the method returns a value, then its assumed to be a valid domain and used as domainId. + * If the method returns a value, then it's assumed to be a valid domain and used as domainId. * If the method returns null, acls are disabled at session level, i.e use default acls at session * level. * If the method throws an Exception, history logging is disabled for the entire session. diff --git a/tez-api/src/main/java/org/apache/tez/common/security/Master.java b/tez-api/src/main/java/org/apache/tez/common/security/Master.java index de73d10092..d0b8d16c47 100644 --- a/tez-api/src/main/java/org/apache/tez/common/security/Master.java +++ b/tez-api/src/main/java/org/apache/tez/common/security/Master.java @@ -29,12 +29,14 @@ @Private @Unstable -public class Master { +public final class Master { public enum State { - INITIALIZING, RUNNING; + INITIALIZING, RUNNING } + private Master() {} + public static String getMasterUserName(Configuration conf) { return conf.get(YarnConfiguration.RM_PRINCIPAL); } diff --git a/tez-api/src/main/java/org/apache/tez/common/security/TokenCache.java b/tez-api/src/main/java/org/apache/tez/common/security/TokenCache.java index fc2c07dc1c..21b1026d39 100644 --- a/tez-api/src/main/java/org/apache/tez/common/security/TokenCache.java +++ b/tez-api/src/main/java/org/apache/tez/common/security/TokenCache.java @@ -34,6 +34,7 @@ import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.token.Token; import org.apache.hadoop.security.token.TokenIdentifier; +import org.apache.tez.dag.api.TezConfiguration; /** @@ -44,14 +45,16 @@ */ @InterfaceAudience.Private @InterfaceStability.Unstable -public class TokenCache { +public final class TokenCache { private static final Logger LOG = LoggerFactory.getLogger(TokenCache.class); + private TokenCache() {} + /** * auxiliary method to get user's secret keys.. - * @param alias + * * @return secret key from the storage */ public static byte[] getSecretKey(Credentials credentials, Text alias) { @@ -63,10 +66,9 @@ public static byte[] getSecretKey(Credentials credentials, Text alias) { /** * Convenience method to obtain delegation tokens from namenodes * corresponding to the paths passed. - * @param credentials + * @param credentials credentials * @param ps array of paths * @param conf configuration - * @throws IOException */ public static void obtainTokensForFileSystems(Credentials credentials, Path[] ps, Configuration conf) throws IOException { @@ -79,7 +81,7 @@ public static void obtainTokensForFileSystems(Credentials credentials, private static final int MAX_FS_OBJECTS = 10; static void obtainTokensForFileSystemsInternal(Credentials credentials, Path[] ps, Configuration conf) throws IOException { - Set fsSet = new HashSet(); + Set fsSet = new HashSet<>(); boolean limitExceeded = false; for(Path p: ps) { FileSystem fs = p.getFileSystem(conf); @@ -101,24 +103,37 @@ static void obtainTokensForFileSystemsInternal(Credentials credentials, } } + static boolean isTokenRenewalExcluded(FileSystem fs, Configuration conf) { + String[] nns = + conf.getStrings(TezConfiguration.TEZ_JOB_FS_SERVERS_TOKEN_RENEWAL_EXCLUDE); + if (nns != null) { + String host = fs.getUri().getHost(); + for (String nn : nns) { + if (nn.equals(host)) { + return true; + } + } + } + return false; + } + /** * get delegation token for a specific FS - * @param fs - * @param credentials - * @param p - * @param conf - * @throws IOException */ static void obtainTokensForFileSystemsInternal(FileSystem fs, Credentials credentials, Configuration conf) throws IOException { // TODO Change this to use YARN utilities once YARN-1664 is fixed. - String delegTokenRenewer = Master.getMasterPrincipal(conf); - if (delegTokenRenewer == null || delegTokenRenewer.length() == 0) { - throw new IOException( - "Can't get Master Kerberos principal for use as renewer"); + // RM skips renewing token with empty renewer + String delegTokenRenewer = ""; + if (!isTokenRenewalExcluded(fs, conf)) { + delegTokenRenewer = Master.getMasterPrincipal(conf); + if (delegTokenRenewer == null || delegTokenRenewer.length() == 0) { + throw new IOException( + "Can't get Master Kerberos principal for use as renewer"); + } } - final Token tokens[] = fs.addDelegationTokens(delegTokenRenewer, + final Token[] tokens = fs.addDelegationTokens(delegTokenRenewer, credentials); if (tokens != null) { for (Token token : tokens) { @@ -131,7 +146,6 @@ static void obtainTokensForFileSystemsInternal(FileSystem fs, /** * store session specific token - * @param t */ @InterfaceAudience.Private public static void setSessionToken(Token t, diff --git a/tez-api/src/main/java/org/apache/tez/dag/api/DAG.java b/tez-api/src/main/java/org/apache/tez/dag/api/DAG.java index cdfa3b2488..0864b82e80 100644 --- a/tez-api/src/main/java/org/apache/tez/dag/api/DAG.java +++ b/tez-api/src/main/java/org/apache/tez/dag/api/DAG.java @@ -33,6 +33,7 @@ import java.util.Map.Entry; import java.util.Set; import java.util.Stack; +import java.util.Objects; import org.apache.commons.collections4.BidiMap; import org.apache.commons.collections4.bidimap.DualLinkedHashBidiMap; @@ -41,6 +42,7 @@ import org.apache.hadoop.yarn.util.ConverterUtils; import org.apache.tez.client.CallerContext; import org.apache.tez.common.JavaOptsChecker; +import org.apache.tez.common.TezUtils; import org.apache.tez.dag.api.Vertex.VertexExecutionContext; import org.apache.tez.dag.api.records.DAGProtos; import org.apache.tez.serviceplugins.api.ServicePluginsDescriptor; @@ -72,22 +74,22 @@ import org.apache.tez.dag.api.records.DAGProtos.VertexPlan; import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; +import org.apache.tez.common.Preconditions; import com.google.common.collect.Lists; import com.google.common.collect.Maps; import com.google.common.collect.Sets; /** - * Top level entity that defines the DAG (Directed Acyclic Graph) representing - * the data flow graph. Consists of a set of Vertices and Edges connecting the - * vertices. Vertices represent transformations of data and edges represent + * Top level entity that defines the DAG (Directed Acyclic Graph) representing + * the data flow graph. Consists of a set of Vertices and Edges connecting the + * vertices. Vertices represent transformations of data and edges represent * movement of data between vertices. */ @Public public class DAG { - + private static final Logger LOG = LoggerFactory.getLogger(DAG.class); - + final BidiMap vertices = new DualLinkedHashBidiMap(); final Set edges = Sets.newHashSet(); @@ -127,11 +129,11 @@ public static DAG create(String name) { * @return {@link DAG} */ public synchronized DAG addTaskLocalFiles(Map localFiles) { - Preconditions.checkNotNull(localFiles); + Objects.requireNonNull(localFiles); TezCommonUtils.addAdditionalLocalResources(localFiles, commonTaskLocalFiles, "DAG " + getName()); return this; } - + public synchronized DAG addVertex(Vertex vertex) { if (vertices.containsKey(vertex.getName())) { throw new IllegalStateException( @@ -144,18 +146,18 @@ public synchronized DAG addVertex(Vertex vertex) { public synchronized Vertex getVertex(String vertexName) { return vertices.get(vertexName); } - + /** * One of the methods that can be used to provide information about required * Credentials when running on a secure cluster. A combination of this and * addURIsForCredentials should be used to specify information about all * credentials required by a DAG. AM specific credentials are not used when * executing a DAG. - * + * * Set credentials which will be required to run this dag. This method can be * used if the client has already obtained some or all of the required * credentials. - * + * * @param credentials Credentials for the DAG * @return {@link DAG} */ @@ -177,7 +179,7 @@ public synchronized DAG setCredentials(Credentials credentials) { */ @Deprecated public synchronized DAG setDAGInfo(String dagInfo) { - Preconditions.checkNotNull(dagInfo); + Objects.requireNonNull(dagInfo); this.dagInfo = dagInfo; return this; } @@ -189,13 +191,13 @@ public synchronized DAG setDAGInfo(String dagInfo) { * @return {@link DAG} */ public synchronized DAG setCallerContext(CallerContext callerContext) { - Preconditions.checkNotNull(callerContext); + Objects.requireNonNull(callerContext); this.callerContext = callerContext; return this; } /** - * Create a group of vertices that share a common output. This can be used to implement + * Create a group of vertices that share a common output. This can be used to implement * unions efficiently. * @param name Name of the group. * @param members {@link Vertex} members of the group @@ -242,27 +244,27 @@ public synchronized DAGAccessControls getDagAccessControls() { * setCredentials should be used to specify information about all credentials * required by a DAG. AM specific credentials are not used when executing a * DAG. - * + * * This method can be used to specify a list of URIs for which Credentials * need to be obtained so that the job can run. An incremental list of URIs * can be provided by making multiple calls to the method. - * + * * Currently, @{link credentials} can only be fetched for HDFS and other * {@link org.apache.hadoop.fs.FileSystem} implementations that support * credentials. - * + * * @param uris * a list of {@link URI}s * @return {@link DAG} */ public synchronized DAG addURIsForCredentials(Collection uris) { - Preconditions.checkNotNull(uris, "URIs cannot be null"); + Objects.requireNonNull(uris, "URIs cannot be null"); urisForCredentials.addAll(uris); return this; } /** - * + * * @return an unmodifiable list representing the URIs for which credentials * are required. */ @@ -270,7 +272,7 @@ public synchronized DAG addURIsForCredentials(Collection uris) { public synchronized Collection getURIsForCredentials() { return Collections.unmodifiableCollection(urisForCredentials); } - + @Private public synchronized Set getVertices() { return Collections.unmodifiableSet(this.vertices.values()); @@ -303,7 +305,7 @@ public synchronized DAG addEdge(Edge edge) { edges.add(edge); return this; } - + /** * Add a {@link GroupInputEdge} to the DAG. * @param edge {@link GroupInputEdge} @@ -327,7 +329,7 @@ public synchronized DAG addEdge(GroupInputEdge edge) { VertexGroup av = edge.getInputVertexGroup(); av.addOutputVertex(edge.getOutputVertex(), edge); groupInputEdges.add(edge); - + // add new edge between members of VertexGroup and destVertex of the GroupInputEdge List newEdges = Lists.newLinkedList(); Vertex dstVertex = edge.getOutputVertex(); @@ -336,14 +338,14 @@ public synchronized DAG addEdge(GroupInputEdge edge) { newEdges.add(Edge.create(member, dstVertex, edge.getEdgeProperty())); } dstVertex.addGroupInput(uv.getGroupName(), uv.getGroupInfo()); - + for (Edge e : newEdges) { addEdge(e); } - + return this; } - + /** * Get the DAG name * @return DAG name @@ -432,7 +434,7 @@ void checkAndInferOneToOneParallelism() { newKnownTasksVertices.add(vertex); } } - + // walk through all known source 1-1 edges and infer parallelism // add newly inferred vertices for consideration as known sources // the outer loop will run for every new level of inferring the parallelism @@ -455,19 +457,19 @@ void checkAndInferOneToOneParallelism() { } } } - + // check for inconsistency and errors for (Edge e : edges) { Vertex inputVertex = e.getInputVertex(); Vertex outputVertex = e.getOutputVertex(); - + if (e.getEdgeProperty().getDataMovementType() == DataMovementType.ONE_TO_ONE) { if (inputVertex.getParallelism() != outputVertex.getParallelism()) { // both should be equal or equal to -1. if (outputVertex.getParallelism() != -1) { throw new TezUncheckedException( "1-1 Edge. Destination vertex parallelism must match source vertex. " - + "Vertex: " + inputVertex.getName() + " does not match vertex: " + + "Vertex: " + inputVertex.getName() + " does not match vertex: " + outputVertex.getName()); } } @@ -526,7 +528,7 @@ void checkAndInferOneToOneParallelism() { } } } - + // AnnotatedVertex is used by verify() private static class AnnotatedVertex { Vertex v; @@ -572,7 +574,7 @@ Deque verify(boolean restricted) throws IllegalStateException { if (vertices.isEmpty()) { throw new IllegalStateException("Invalid dag containing 0 vertices"); } - + // check for valid vertices, duplicate vertex names, // and prepare for cycle detection Map vertexMap = new HashMap(); @@ -590,14 +592,14 @@ Deque verify(boolean restricted) throws IllegalStateException { for (Edge e : edges) { // Construct structure for cycle detection Vertex inputVertex = e.getInputVertex(); - Vertex outputVertex = e.getOutputVertex(); + Vertex outputVertex = e.getOutputVertex(); List edgeList = edgeMap.get(inputVertex); if (edgeList == null) { edgeList = new ArrayList(); edgeMap.put(inputVertex, edgeList); } edgeList.add(e); - + // Construct map for Input name verification Set inboundSet = inboundVertexMap.get(outputVertex); if (inboundSet == null) { @@ -605,7 +607,7 @@ Deque verify(boolean restricted) throws IllegalStateException { inboundVertexMap.put(outputVertex, inboundSet); } inboundSet.add(inputVertex.getName()); - + // Construct map for Output name verification Set outboundSet = outboundVertexMap.get(inputVertex); if (outboundSet == null) { @@ -617,7 +619,7 @@ Deque verify(boolean restricted) throws IllegalStateException { // check input and output names don't collide with vertex names for (Vertex vertex : vertices.values()) { - for (RootInputLeafOutput + for (RootInputLeafOutput input : vertex.getInputs()) { if (vertexMap.containsKey(input.getName())) { throw new IllegalStateException("Vertex: " @@ -626,7 +628,7 @@ Deque verify(boolean restricted) throws IllegalStateException { + input.getName()); } } - for (RootInputLeafOutput + for (RootInputLeafOutput output : vertex.getOutputs()) { if (vertexMap.containsKey(output.getName())) { throw new IllegalStateException("Vertex: " @@ -640,7 +642,7 @@ Deque verify(boolean restricted) throws IllegalStateException { // Check for valid InputNames for (Entry> entry : inboundVertexMap.entrySet()) { Vertex vertex = entry.getKey(); - for (RootInputLeafOutput + for (RootInputLeafOutput input : vertex.getInputs()) { if (entry.getValue().contains(input.getName())) { throw new IllegalStateException("Vertex: " @@ -654,7 +656,7 @@ Deque verify(boolean restricted) throws IllegalStateException { // Check for valid OutputNames for (Entry> entry : outboundVertexMap.entrySet()) { Vertex vertex = entry.getKey(); - for (RootInputLeafOutput + for (RootInputLeafOutput output : vertex.getOutputs()) { if (entry.getValue().contains(output.getName())) { throw new IllegalStateException("Vertex: " @@ -664,8 +666,8 @@ Deque verify(boolean restricted) throws IllegalStateException { } } } - - + + // Not checking for repeated input names / output names vertex names on the same vertex, // since we only allow 1 at the moment. // When additional inputs are supported, this can be chceked easily (and early) @@ -677,16 +679,12 @@ Deque verify(boolean restricted) throws IllegalStateException { if (restricted) { for (Edge e : edges) { - if (e.getEdgeProperty().getDataSourceType() != - DataSourceType.PERSISTED) { + DataSourceType dataSourceType = e.getEdgeProperty().getDataSourceType(); + if (dataSourceType != DataSourceType.PERSISTED && + dataSourceType != DataSourceType.EPHEMERAL) { throw new IllegalStateException( "Unsupported source type on edge. " + e); } - if (e.getEdgeProperty().getSchedulingType() != - SchedulingType.SEQUENTIAL) { - throw new IllegalStateException( - "Unsupported scheduling type on edge. " + e); - } } } @@ -877,13 +875,13 @@ public synchronized DAGPlan createDag(Configuration tezConf, Credentials extraCr groupBuilder.addGroupMembers(v.getName()); } groupBuilder.addAllOutputs(groupInfo.outputs); - for (Map.Entry entry : + for (Map.Entry entry : groupInfo.edgeMergedInputs.entrySet()) { groupBuilder.addEdgeMergedInputs( PlanGroupInputEdgeInfo.newBuilder().setDestVertexName(entry.getKey()). setMergedInput(DagTypeConverters.convertToDAGPlan(entry.getValue()))); } - dagBuilder.addVertexGroups(groupBuilder); + dagBuilder.addVertexGroups(groupBuilder); } } @@ -955,7 +953,7 @@ public synchronized DAGPlan createDag(Configuration tezConf, Credentials extraCr dagCredentials.addAll(dataSink.getCredentials()); } } - + VertexPlan.Builder vertexBuilder = VertexPlan.newBuilder(); vertexBuilder.setName(vertex.getName()); vertexBuilder.setType(PlanVertexType.NORMAL); // vertex type is implicitly NORMAL until TEZ-46. @@ -985,12 +983,7 @@ public synchronized DAGPlan createDag(Configuration tezConf, Credentials extraCr if (vertex.getConf()!= null && vertex.getConf().size() > 0) { ConfigurationProto.Builder confBuilder = ConfigurationProto.newBuilder(); - for (Map.Entry entry : vertex.getConf().entrySet()) { - PlanKeyValuePair.Builder keyValueBuilder = PlanKeyValuePair.newBuilder(); - keyValueBuilder.setKey(entry.getKey()); - keyValueBuilder.setValue(entry.getValue()); - confBuilder.addConfKeyValues(keyValueBuilder); - } + TezUtils.populateConfProtoFromEntries(vertex.getConf().entrySet(), confBuilder); vertexBuilder.setVertexConf(confBuilder); } @@ -1049,7 +1042,7 @@ public synchronized DAGPlan createDag(Configuration tezConf, Credentials extraCr } } } - + if (vertex.getVertexManagerPlugin() != null) { vertexBuilder.setVertexManagerPlugin(DagTypeConverters .convertToDAGPlan(vertex.getVertexManagerPlugin())); @@ -1091,12 +1084,7 @@ public synchronized DAGPlan createDag(Configuration tezConf, Credentials extraCr ConfigurationProto.Builder confProtoBuilder = ConfigurationProto.newBuilder(); if (!this.dagConf.isEmpty()) { - for (Entry entry : this.dagConf.entrySet()) { - PlanKeyValuePair.Builder kvp = PlanKeyValuePair.newBuilder(); - kvp.setKey(entry.getKey()); - kvp.setValue(entry.getValue()); - confProtoBuilder.addConfKeyValues(kvp); - } + TezUtils.populateConfProtoFromEntries(this.dagConf.entrySet(), confProtoBuilder); } // Copy historyLogLevel from tezConf into dagConf if its not overridden in dagConf. String logLevel = this.dagConf.get(TezConfiguration.TEZ_HISTORY_LOGGING_LOGLEVEL); diff --git a/tez-api/src/main/java/org/apache/tez/dag/api/DAGNotRunningException.java b/tez-api/src/main/java/org/apache/tez/dag/api/DAGNotRunningException.java index cbc93a9647..93f5d71b53 100644 --- a/tez-api/src/main/java/org/apache/tez/dag/api/DAGNotRunningException.java +++ b/tez-api/src/main/java/org/apache/tez/dag/api/DAGNotRunningException.java @@ -21,7 +21,8 @@ import org.apache.hadoop.classification.InterfaceAudience.Private; /** - * Checked Exception thrown upon error + * Thrown by the AM when the DAG for which the status was queried + * is not running anymore: client can decide further action in this case. */ @Private public class DAGNotRunningException extends TezException { diff --git a/tez-api/src/main/java/org/apache/tez/dag/api/DagTypeConverters.java b/tez-api/src/main/java/org/apache/tez/dag/api/DagTypeConverters.java index c5d9c0b4f7..a55e45a7fd 100644 --- a/tez-api/src/main/java/org/apache/tez/dag/api/DagTypeConverters.java +++ b/tez-api/src/main/java/org/apache/tez/dag/api/DagTypeConverters.java @@ -90,7 +90,9 @@ import org.apache.tez.serviceplugins.api.ServicePluginsDescriptor; @Private -public class DagTypeConverters { +public final class DagTypeConverters { + + private DagTypeConverters() {} public static PlanLocalResourceVisibility convertToDAGPlan(LocalResourceVisibility visibility){ switch(visibility){ @@ -624,23 +626,24 @@ public static TezCountersProto convertTezCountersToProto( } public static DAGProtos.StatusGetOptsProto convertStatusGetOptsToProto( - StatusGetOpts statusGetOpts) { + StatusGetOpts statusGetOpts) { switch (statusGetOpts) { - case GET_COUNTERS: - return DAGProtos.StatusGetOptsProto.GET_COUNTERS; + case GET_COUNTERS: + return DAGProtos.StatusGetOptsProto.GET_COUNTERS; + case GET_MEMORY_USAGE: + return DAGProtos.StatusGetOptsProto.GET_MEMORY_USAGE; } - throw new TezUncheckedException("Could not convert StatusGetOpts to" - + " proto"); + throw new TezUncheckedException("Could not convert StatusGetOpts to" + " proto"); } - public static StatusGetOpts convertStatusGetOptsFromProto( - DAGProtos.StatusGetOptsProto proto) { + public static StatusGetOpts convertStatusGetOptsFromProto(DAGProtos.StatusGetOptsProto proto) { switch (proto) { - case GET_COUNTERS: - return StatusGetOpts.GET_COUNTERS; + case GET_COUNTERS: + return StatusGetOpts.GET_COUNTERS; + case GET_MEMORY_USAGE: + return StatusGetOpts.GET_MEMORY_USAGE; } - throw new TezUncheckedException("Could not convert to StatusGetOpts from" - + " proto"); + throw new TezUncheckedException("Could not convert to StatusGetOpts from" + " proto"); } public static List convertStatusGetOptsToProto( @@ -735,7 +738,7 @@ public static ByteBuffer convertFromTezUserPayload(@Nullable UserPayload payload if (payload == null) { return null; } - return payload.getPayload(); + return payload.getRawPayload(); } public static VertexExecutionContextProto convertToProto( diff --git a/tez-api/src/main/java/org/apache/tez/dag/api/DataSinkDescriptor.java b/tez-api/src/main/java/org/apache/tez/dag/api/DataSinkDescriptor.java index 4d0d615db4..fec2bf6eed 100644 --- a/tez-api/src/main/java/org/apache/tez/dag/api/DataSinkDescriptor.java +++ b/tez-api/src/main/java/org/apache/tez/dag/api/DataSinkDescriptor.java @@ -21,13 +21,13 @@ import java.net.URI; import java.util.Collection; import java.util.Collections; +import java.util.Objects; import javax.annotation.Nullable; import org.apache.hadoop.classification.InterfaceAudience.Public; import org.apache.hadoop.security.Credentials; -import com.google.common.base.Preconditions; import com.google.common.collect.Sets; /** @@ -121,7 +121,7 @@ public OutputDescriptor getOutputDescriptor() { * @return this */ public synchronized DataSinkDescriptor addURIsForCredentials(Collection uris) { - Preconditions.checkNotNull(uris, "URIs cannot be null"); + Objects.requireNonNull(uris, "URIs cannot be null"); urisForCredentials.addAll(uris); return this; } diff --git a/tez-api/src/main/java/org/apache/tez/dag/api/DataSourceDescriptor.java b/tez-api/src/main/java/org/apache/tez/dag/api/DataSourceDescriptor.java index 1c5c16db5f..db43c91f7a 100644 --- a/tez-api/src/main/java/org/apache/tez/dag/api/DataSourceDescriptor.java +++ b/tez-api/src/main/java/org/apache/tez/dag/api/DataSourceDescriptor.java @@ -24,6 +24,7 @@ import java.util.Collection; import java.util.Collections; import java.util.Map; +import java.util.Objects; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceAudience.Private; @@ -34,7 +35,6 @@ import org.apache.tez.runtime.api.InputInitializer; import org.apache.tez.runtime.api.events.InputDataInformationEvent; -import com.google.common.base.Preconditions; import com.google.common.collect.Sets; /** @@ -157,7 +157,7 @@ public InputDescriptor getInputDescriptor() { * @return this */ public synchronized DataSourceDescriptor addURIsForCredentials(Collection uris) { - Preconditions.checkNotNull(uris, "URIs cannot be null"); + Objects.requireNonNull(uris, "URIs cannot be null"); urisForCredentials.addAll(uris); return this; } diff --git a/tez-api/src/main/java/org/apache/tez/dag/api/EdgeManagerPluginOnDemand.java b/tez-api/src/main/java/org/apache/tez/dag/api/EdgeManagerPluginOnDemand.java index 3d7f2ab2b7..be58354c67 100644 --- a/tez-api/src/main/java/org/apache/tez/dag/api/EdgeManagerPluginOnDemand.java +++ b/tez-api/src/main/java/org/apache/tez/dag/api/EdgeManagerPluginOnDemand.java @@ -32,8 +32,8 @@ import org.apache.tez.runtime.api.events.InputReadErrorEvent; /** - * This interface defines the routing of the event between tasks of producer and - * consumer vertices. The routing is bi-directional. Users can customize the + * This interface defines the routing of the event between tasks of producer and + * consumer vertices. The routing is bidirectional. Users can customize the * routing by providing an implementation of this interface. */ @Public @@ -70,22 +70,22 @@ public int getSource() { /** * Class to provide routing metadata for {@link Event}s to be routed between - * producer and consumer tasks. The routing data enabled the system to send + * producer and consumer tasks. The routing data enabled the system to send * the event from the producer task output to the consumer task input */ public static class EventRouteMetadata { private final int numEvents; private final int[] targetIndices; private final int[] sourceIndices; - + /** * Create an {@link EventRouteMetadata} that will create numEvents copies of * the {@link Event} to be routed. Use this to create * {@link EventRouteMetadata} for {@link DataMovementEvent}s or * {@link InputFailedEvent}s where the target input indices must be - * specified to route those events. Typically numEvents would be 1 for these + * specified to route those events. Typically, numEvents would be 1 for these * events. - * + * * @param numEvents * Number of copies of the event to be routed * @param targetIndices @@ -97,14 +97,14 @@ public static class EventRouteMetadata { public static EventRouteMetadata create(int numEvents, int[] targetIndices) { return new EventRouteMetadata(numEvents, targetIndices, null); } - + /** * Create an {@link EventRouteMetadata} that will create numEvents copies of * the {@link Event} to be routed. Use this to create * {@link EventRouteMetadata} for {@link CompositeDataMovementEvent} where * the target input indices and source output indices must be specified to - * route those events. Typically numEvents would be 1 for these events. - * + * route those events. Typically, numEvents would be 1 for these events. + * * @param numEvents * Number of copies of the event to be routed * @param targetIndices @@ -157,7 +157,7 @@ public int getNumEvents() { * extending this to create a {@link EdgeManagerPluginOnDemand}, must provide * the same constructor so that Tez can create an instance of the class at * runtime. - * + * * @param context * the context within which this {@link EdgeManagerPluginOnDemand} * will run. Includes information like configuration which the user @@ -177,7 +177,7 @@ public EdgeManagerPluginOnDemand(EdgeManagerPluginContext context) { * @throws Exception */ public abstract void initialize() throws Exception; - + /** * This method will be invoked just before routing of events will begin. The * plugin can use this opportunity to make any runtime initialization's that @@ -187,7 +187,7 @@ public EdgeManagerPluginOnDemand(EdgeManagerPluginContext context) { /** * Get the number of physical inputs on the destination task - * @param destinationTaskIndex Index of destination task for which number of + * @param destinationTaskIndex Index of destination task for which number of * inputs is needed * @return Number of physical inputs on the destination task * @throws Exception @@ -196,34 +196,34 @@ public EdgeManagerPluginOnDemand(EdgeManagerPluginContext context) { /** * Get the number of physical outputs on the source task - * @param sourceTaskIndex Index of the source task for which number of outputs + * @param sourceTaskIndex Index of the source task for which number of outputs * is needed * @return Number of physical outputs on the source task * @throws Exception */ public abstract int getNumSourceTaskPhysicalOutputs(int sourceTaskIndex) throws Exception; - + /** * Get the number of destination tasks that consume data from the source task * @param sourceTaskIndex Source task index * @throws Exception */ public abstract int getNumDestinationConsumerTasks(int sourceTaskIndex) throws Exception; - + /** * Return the source task index to which to send the input error event - * + * * @param destinationTaskIndex * Destination task that reported the error * @param destinationFailedInputIndex - * Index of the physical input on the destination task that reported + * Index of the physical input on the destination task that reported * the error * @return Index of the source task that created the unavailable input * @throws Exception */ public abstract int routeInputErrorEventToSource(int destinationTaskIndex, int destinationFailedInputIndex) throws Exception; - + /** * The method provides the {@link EventRouteMetadata} to route a * {@link DataMovementEvent} produced by the given source task to the given @@ -231,7 +231,7 @@ public abstract int routeInputErrorEventToSource(int destinationTaskIndex, * target input indices set to enable the routing. If the routing metadata is * common across different events then the plugin can cache and reuse the same * object. - * + * * @param sourceTaskIndex * The index of the task in the source vertex of this edge that * produced a {@link DataMovementEvent} @@ -254,7 +254,7 @@ public abstract int routeInputErrorEventToSource(int destinationTaskIndex, * the target input indices and source output indices set to enable the * routing. If the routing metadata is common across different events then the * plugin can cache and reuse the same object. - * + * * @param sourceTaskIndex * The index of the task in the source vertex of this edge that * produced a {@link CompositeDataMovementEvent} @@ -275,7 +275,7 @@ public abstract int routeInputErrorEventToSource(int destinationTaskIndex, * target input indices set to enable the routing. If the routing metadata is * common across different events then the plugin can cache and reuse the same * object. - * + * * @param sourceTaskIndex * The index of the failed task in the source vertex of this edge. * @param destinationTaskIndex @@ -287,7 +287,7 @@ public abstract int routeInputErrorEventToSource(int destinationTaskIndex, */ public abstract @Nullable EventRouteMetadata routeInputSourceTaskFailedEventToDestination( int sourceTaskIndex, int destinationTaskIndex) throws Exception; - + /** * Return the {@link org.apache.tez.dag.api.EdgeManagerPluginContext} for this specific instance of * the vertex manager. @@ -305,7 +305,7 @@ public EdgeManagerPluginContext getContext() { * The event will be routed to every destination task index in the key of the * map. Every physical input in the value for that task key will receive the * input. - * + * * @param event * Data movement event that contains the output information * @param sourceTaskIndex @@ -329,7 +329,7 @@ public void routeDataMovementEventToDestination(DataMovementEvent event, * for that task key will receive the failure notification. This method will * be called once for every source task failure and information for all * affected destinations must be provided in that invocation. - * + * * @param sourceTaskIndex * Source task * @param destinationTaskAndInputIndices @@ -341,19 +341,19 @@ public void routeInputSourceTaskFailedEventToDestination(int sourceTaskIndex, /** * Return the source task index to which to send the input error event - * + * * @param event * Input read error event. Has more information about the error * @param destinationTaskIndex * Destination task that reported the error * @param destinationFailedInputIndex - * Index of the physical input on the destination task that reported + * Index of the physical input on the destination task that reported * the error * @return Index of the source task that created the unavailable input * @throws Exception */ public int routeInputErrorEventToSource(InputReadErrorEvent event, - int destinationTaskIndex, int destinationFailedInputIndex) throws Exception { + int destinationTaskIndex, int destinationFailedInputIndex) throws Exception { return routeInputErrorEventToSource(destinationTaskIndex, destinationFailedInputIndex); } diff --git a/tez-api/src/main/java/org/apache/tez/dag/api/EdgeProperty.java b/tez-api/src/main/java/org/apache/tez/dag/api/EdgeProperty.java index 07fb2c140f..1850060ece 100644 --- a/tez-api/src/main/java/org/apache/tez/dag/api/EdgeProperty.java +++ b/tez-api/src/main/java/org/apache/tez/dag/api/EdgeProperty.java @@ -21,7 +21,7 @@ import org.apache.hadoop.classification.InterfaceAudience.Public; import org.apache.hadoop.classification.InterfaceStability.Unstable; -import com.google.common.base.Preconditions; +import org.apache.tez.common.Preconditions; /** * An @link {@link EdgeProperty} defines the relation between the source and @@ -43,7 +43,7 @@ public class EdgeProperty { */ public enum DataMovementType { /** - * Output on this edge produced by the i-th source task is available to the + * Output on this edge produced by the i-th source task is available to the * i-th destination task. */ ONE_TO_ONE, @@ -58,20 +58,20 @@ public enum DataMovementType { * are gathered by designated destination tasks. */ SCATTER_GATHER, - + /** * Custom routing defined by the user. */ CUSTOM } - + /** * Determines the lifetime of the data produced on this edge by a source task. */ public enum DataSourceType { /** * Data produced by the source is persisted and available even when the - * task is not running. The data may become unavailable and may cause the + * task is not running. The data may become unavailable and may cause the * source task to be re-executed. */ PERSISTED, @@ -82,31 +82,51 @@ public enum DataSourceType { PERSISTED_RELIABLE, /** * Data produced by the source task is available only while the source task - * is running. This requires the destination task to run concurrently with - * the source task. This is not supported yet. + * is running. This requires the destination task to run concurrently with + * the source task. Development in progress. */ @Unstable EPHEMERAL } - + /** - * Determines when the destination task is eligible to run, once the source + * Determines when the destination task is eligible to run, once the source * task is eligible to run. */ public enum SchedulingType { /** - * Destination task is eligible to run after one or more of its source tasks + * Destination task is eligible to run after one or more of its source tasks * have started or completed. */ SEQUENTIAL, /** * Destination task must run concurrently with the source task. - * This is not supported yet. + * Development in progress. */ @Unstable CONCURRENT } - + + /** + * Determines the relevant event(s) that will assist in scheduling downstream vertex + * connected via a edge with CONCURRENT {@link SchedulingType}. + */ + public enum ConcurrentEdgeTriggerType { + /** + * trigger tasks scheduling for downstream vertex(es) upon upstream being configured + * this effectively simultaneously schedules downstream and upstream vertices + * connected on both ends of a concurrent edge. + */ + SOURCE_VERTEX_CONFIGURED, + + /** + * trigger tasks scheduling for downstream vertex(es) by "running" event(s) of upstream tasks + * this will be fully supported with TEZ-3999 + */ + SOURCE_TASK_STARTED + } + + final DataMovementType dataMovementType; final DataSourceType dataSourceType; final SchedulingType schedulingType; @@ -172,7 +192,7 @@ private EdgeProperty(DataMovementType dataMovementType, Preconditions.checkArgument(dataMovementType != DataMovementType.CUSTOM, DataMovementType.CUSTOM + " cannot be used with this constructor"); } - + private EdgeProperty(EdgeManagerPluginDescriptor edgeManagerDescriptor, DataSourceType dataSourceType, @@ -182,7 +202,7 @@ private EdgeProperty(EdgeManagerPluginDescriptor edgeManagerDescriptor, this(edgeManagerDescriptor, DataMovementType.CUSTOM, dataSourceType, schedulingType, edgeSource, edgeDestination); } - + private EdgeProperty(EdgeManagerPluginDescriptor edgeManagerDescriptor, DataMovementType dataMovementType, DataSourceType dataSourceType, SchedulingType schedulingType, OutputDescriptor edgeSource, InputDescriptor edgeDestination) { @@ -193,7 +213,7 @@ private EdgeProperty(EdgeManagerPluginDescriptor edgeManagerDescriptor, this.inputDescriptor = edgeDestination; this.outputDescriptor = edgeSource; } - + /** * Get the {@link DataMovementType} * @return {@link DataMovementType} @@ -201,7 +221,7 @@ private EdgeProperty(EdgeManagerPluginDescriptor edgeManagerDescriptor, public DataMovementType getDataMovementType() { return dataMovementType; } - + /** * Get the {@link DataSourceType} * @return {@link DataSourceType} @@ -209,7 +229,7 @@ public DataMovementType getDataMovementType() { public DataSourceType getDataSourceType() { return dataSourceType; } - + /** * Get the {@link SchedulingType} * @return {@link SchedulingType} @@ -217,30 +237,30 @@ public DataSourceType getDataSourceType() { public SchedulingType getSchedulingType() { return schedulingType; } - + /** * @return the {@link InputDescriptor} which will consume data from the edge. */ public InputDescriptor getEdgeDestination() { return inputDescriptor; } - + /** * @return the {@link OutputDescriptor} which produces data on the edge. */ public OutputDescriptor getEdgeSource() { return outputDescriptor; } - + /** - * Returns the Edge Manager specifications for this edge. + * Returns the Edge Manager specifications for this edge. * @return @link {@link EdgeManagerPluginDescriptor} if a custom edge was setup, null otherwise. */ @Private public EdgeManagerPluginDescriptor getEdgeManagerDescriptor() { return edgeManagerDescriptor; } - + @Override public String toString() { return "{ " + dataMovementType + " : " + inputDescriptor.getClassName() @@ -248,5 +268,5 @@ public String toString() { + " >> " + (edgeManagerDescriptor == null ? "NullEdgeManager" : edgeManagerDescriptor.getClassName()) + " }"; } - + } diff --git a/tez-api/src/main/java/org/apache/tez/dag/api/EntityDescriptor.java b/tez-api/src/main/java/org/apache/tez/dag/api/EntityDescriptor.java index dcc4ebf779..2c7b834c1c 100644 --- a/tez-api/src/main/java/org/apache/tez/dag/api/EntityDescriptor.java +++ b/tez-api/src/main/java/org/apache/tez/dag/api/EntityDescriptor.java @@ -22,15 +22,15 @@ import java.io.DataOutput; import java.io.IOException; import java.nio.ByteBuffer; +import java.util.Objects; +import com.google.common.annotations.VisibleForTesting; import org.apache.hadoop.classification.InterfaceAudience.Private; import org.apache.hadoop.classification.InterfaceAudience.Public; import org.apache.hadoop.io.DataOutputBuffer; import org.apache.hadoop.io.Text; import org.apache.hadoop.io.Writable; -import com.google.common.base.Preconditions; - /** * Describes a given user code entity. Consists of the name of the class implementing * the user logic and a payload that can be used to configure an object instance of @@ -65,7 +65,7 @@ public UserPayload getUserPayload() { * @return this object for further chained method calls */ public T setUserPayload(UserPayload userPayload) { - Preconditions.checkNotNull(userPayload); + Objects.requireNonNull(userPayload); this.userPayload = userPayload; return (T) this; } @@ -94,36 +94,40 @@ public String getClassName() { return this.className; } + void writeSingular(DataOutput out, ByteBuffer bb) throws IOException { + out.write(bb.array(), 0, bb.array().length); + } + + void writeSegmented(DataOutput out, ByteBuffer bb) throws IOException { + // This code is just for fallback in case serialization is changed to + // use something other than DataOutputBuffer. + int len; + byte[] buf = new byte[SERIALIZE_BUFFER_SIZE]; + do { + len = Math.min(bb.remaining(), SERIALIZE_BUFFER_SIZE); + bb.get(buf, 0, len); + out.write(buf, 0, len); + } while (bb.remaining() > 0); + } + @Override public void write(DataOutput out) throws IOException { Text.writeString(out, className); // TODO: TEZ-305 - using protobuf serde instead of Writable serde. ByteBuffer bb = DagTypeConverters.convertFromTezUserPayload(userPayload); - if (bb == null) { + if (bb == null || bb.remaining() == 0) { out.writeInt(-1); + return; + } + + // write size + out.writeInt(bb.remaining()); + if (bb.hasArray()) { + writeSingular(out, bb); } else { - int size = bb.remaining(); - if (size == 0) { - out.writeInt(-1); - } else { - out.writeInt(size); - if (out instanceof DataOutputBuffer) { - DataOutputBuffer buf = (DataOutputBuffer) out; - buf.write(new ByteBufferDataInput(bb), size); - } else { - // This code is just for fallback in case serialization is changed to - // use something other than DataOutputBuffer. - int len; - byte[] buf = new byte[SERIALIZE_BUFFER_SIZE]; - do { - len = Math.min(bb.remaining(), SERIALIZE_BUFFER_SIZE); - bb.get(buf, 0, len); - out.write(buf, 0, len); - } while (bb.remaining() > 0); - } - } - out.writeInt(userPayload.getVersion()); + writeSegmented(out, bb); } + out.writeInt(userPayload.getVersion()); } @Override @@ -144,76 +148,4 @@ public String toString() { userPayload == null ? false : userPayload.getPayload() == null ? false : true; return "ClassName=" + className + ", hasPayload=" + hasPayload; } - - private static class ByteBufferDataInput implements DataInput { - - private final ByteBuffer bb; - - public ByteBufferDataInput(ByteBuffer bb) { - this.bb = bb; - } - - @Override - public void readFully(byte[] b) throws IOException { - bb.get(b, 0, bb.remaining()); - } - - @Override - public void readFully(byte[] b, int off, int len) throws IOException { - bb.get(b, off, len); - } - - @Override - public int skipBytes(int n) throws IOException { - throw new UnsupportedOperationException(); - } - @Override - public boolean readBoolean() throws IOException { - throw new UnsupportedOperationException(); - } - @Override - public byte readByte() throws IOException { - return bb.get(); - } - @Override - public int readUnsignedByte() throws IOException { - throw new UnsupportedOperationException(); - } - @Override - public short readShort() throws IOException { - throw new UnsupportedOperationException(); - } - @Override - public int readUnsignedShort() throws IOException { - throw new UnsupportedOperationException(); - } - @Override - public char readChar() throws IOException { - throw new UnsupportedOperationException(); - } - @Override - public int readInt() throws IOException { - throw new UnsupportedOperationException(); - } - @Override - public long readLong() throws IOException { - throw new UnsupportedOperationException(); - } - @Override - public float readFloat() throws IOException { - throw new UnsupportedOperationException(); - } - @Override - public double readDouble() throws IOException { - throw new UnsupportedOperationException(); - } - @Override - public String readLine() throws IOException { - throw new UnsupportedOperationException(); - } - @Override - public String readUTF() throws IOException { - throw new UnsupportedOperationException(); - } - } } diff --git a/tez-api/src/main/java/org/apache/tez/dag/api/NamedEntityDescriptor.java b/tez-api/src/main/java/org/apache/tez/dag/api/NamedEntityDescriptor.java index 426d4eb1b0..86ae26c876 100644 --- a/tez-api/src/main/java/org/apache/tez/dag/api/NamedEntityDescriptor.java +++ b/tez-api/src/main/java/org/apache/tez/dag/api/NamedEntityDescriptor.java @@ -17,8 +17,8 @@ import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; +import java.util.Objects; -import com.google.common.base.Preconditions; import org.apache.hadoop.classification.InterfaceAudience; @SuppressWarnings("unchecked") @@ -28,8 +28,7 @@ public class NamedEntityDescriptor> extends E @InterfaceAudience.Private public NamedEntityDescriptor(String entityName, String className) { super(className); - Preconditions.checkArgument(entityName != null, "EntityName must be specified"); - this.entityName = entityName; + this.entityName = Objects.requireNonNull(entityName, "EntityName must be specified"); } public String getEntityName() { diff --git a/tez-api/src/main/java/org/apache/tez/dag/api/NoCurrentDAGException.java b/tez-api/src/main/java/org/apache/tez/dag/api/NoCurrentDAGException.java new file mode 100644 index 0000000000..26ef89abab --- /dev/null +++ b/tez-api/src/main/java/org/apache/tez/dag/api/NoCurrentDAGException.java @@ -0,0 +1,39 @@ +/** +* Licensed to the Apache Software Foundation (ASF) under one +* or more contributor license agreements. See the NOTICE file +* distributed with this work for additional information +* regarding copyright ownership. The ASF licenses this file +* to you under the Apache License, Version 2.0 (the +* "License"); you may not use this file except in compliance +* with the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ + +package org.apache.tez.dag.api; + +import org.apache.hadoop.classification.InterfaceAudience.Private; + +/** + * Fatal exception: thrown by the AM if there is no DAG running when + * a DAG's status is queried. This is different from {@link org.apache.tez.dag.api.DAGNotRunningException} + * in a sense that this exception is fatal, in which scenario the client might consider the DAG failed, because + * it tries to ask a status from an AM which is not currently running a DAG. This scenario is possible in case + * an AM is restarted and the DagClient fails to realize it's asking the status of a possibly lost DAG. + */ +@Private +public class NoCurrentDAGException extends TezException { + private static final long serialVersionUID = 6337442733802964448L; + + public static final String MESSAGE_PREFIX = "No running DAG at present"; + + public NoCurrentDAGException(String dagId) { + super(MESSAGE_PREFIX + ": " + dagId); + } +} diff --git a/tez-api/src/main/java/org/apache/tez/dag/api/TaskLocationHint.java b/tez-api/src/main/java/org/apache/tez/dag/api/TaskLocationHint.java index d1a1359977..3070c338cc 100644 --- a/tez-api/src/main/java/org/apache/tez/dag/api/TaskLocationHint.java +++ b/tez-api/src/main/java/org/apache/tez/dag/api/TaskLocationHint.java @@ -20,12 +20,13 @@ import java.util.Collections; import java.util.Set; +import java.util.Objects; import org.apache.hadoop.classification.InterfaceAudience.Public; import org.apache.hadoop.classification.InterfaceStability.Evolving; import org.apache.hadoop.classification.InterfaceStability.Unstable; -import com.google.common.base.Preconditions; +import org.apache.tez.common.Preconditions; /** * Describes the placements hints for tasks. @@ -67,7 +68,7 @@ public String toString() { private TaskBasedLocationAffinity affinitizedTask; private TaskLocationHint(String vertexName, int taskIndex) { - Preconditions.checkNotNull(vertexName); + Objects.requireNonNull(vertexName); Preconditions.checkArgument(taskIndex >= 0); this.affinitizedTask = new TaskBasedLocationAffinity(vertexName, taskIndex); } diff --git a/tez-api/src/main/java/org/apache/tez/dag/api/TezConfiguration.java b/tez-api/src/main/java/org/apache/tez/dag/api/TezConfiguration.java index 39688d68bc..8862f4b7d6 100644 --- a/tez-api/src/main/java/org/apache/tez/dag/api/TezConfiguration.java +++ b/tez-api/src/main/java/org/apache/tez/dag/api/TezConfiguration.java @@ -26,22 +26,25 @@ import java.util.Map; import java.util.Set; -import org.apache.tez.common.annotation.ConfigurationClass; -import org.apache.tez.common.annotation.ConfigurationProperty; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; +import java.util.concurrent.TimeUnit; import org.apache.hadoop.classification.InterfaceAudience.Private; import org.apache.hadoop.classification.InterfaceAudience.Public; import org.apache.hadoop.classification.InterfaceStability.Unstable; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.util.Shell; import org.apache.hadoop.yarn.api.records.LocalResource; +import org.apache.tez.common.TezCommonUtils; +import org.apache.tez.common.annotation.ConfigurationClass; +import org.apache.tez.common.annotation.ConfigurationProperty; +import org.apache.tez.dag.api.EdgeProperty.ConcurrentEdgeTriggerType; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import com.google.common.annotations.VisibleForTesting; /** - * Defines the configurations for Tez. These configurations are typically specified in + * Defines the configurations for Tez. These configurations are typically specified in * tez-site.xml on the client machine where TezClient is used to launch the Tez application. * tez-site.xml is expected to be picked up from the classpath of the client process. * @see Detailed Configuration Information @@ -55,7 +58,7 @@ public class TezConfiguration extends Configuration { private final static Logger LOG = LoggerFactory.getLogger(TezConfiguration.class); - private static Map PropertyScope = new HashMap(); + private static Map PropertyScope = new HashMap<>(); static { Configuration.addDeprecation("tez.am.counters.max.keys", TezConfiguration.TEZ_COUNTERS_MAX); @@ -131,7 +134,7 @@ public TezConfiguration(boolean loadDefaults) { public static final String TEZ_TASK_PREFIX = TEZ_PREFIX + "task."; /** - * Boolean value. If true then Tez will try to automatically delete temporary job + * Boolean value. If true then Tez will try to automatically delete temporary job * artifacts that it creates within the specified staging dir. Does not affect any user data. */ @ConfigurationScope(Scope.AM) @@ -183,7 +186,7 @@ public TezConfiguration(boolean loadDefaults) { + "use.concurrent-dispatcher"; @Private public static boolean TEZ_AM_USE_CONCURRENT_DISPATCHER_DEFAULT = false; - + @Private @ConfigurationScope(Scope.AM) public static final String TEZ_AM_CONCURRENT_DISPATCHER_CONCURRENCY = TEZ_AM_PREFIX @@ -191,12 +194,23 @@ public TezConfiguration(boolean loadDefaults) { @Private public static final int TEZ_AM_CONCURRENT_DISPATCHER_CONCURRENCY_DEFAULT = 10; + /** + * Integer value. Milliseconds while AsyncDispatcher should wait for events to be processed on + * serviceStop. The idea is borrowed from YARN-3999. + */ + @Private + @ConfigurationScope(Scope.AM) + public static final String TEZ_AM_DISPATCHER_DRAIN_EVENTS_TIMEOUT = TEZ_AM_PREFIX + + "dispatcher.drain-events.timeout"; + @Private + public static final int TEZ_AM_DISPATCHER_DRAIN_EVENTS_TIMEOUT_DEFAULT = 10000; + /** * Boolean value. Execution mode for the Tez application. True implies session mode. If the client * code is written according to best practices then the same code can execute in either mode based * on this configuration. Session mode is more aggressive in reserving execution resources and is * typically used for interactive applications where multiple DAGs are submitted in quick succession - * by the same user. For long running applications, one-off executions, batch jobs etc non-session + * by the same user. For long running applications, one-off executions, batch jobs etc non-session * mode is recommended. If session mode is enabled then container reuse is recommended. */ @ConfigurationScope(Scope.AM) @@ -246,6 +260,51 @@ public TezConfiguration(boolean loadDefaults) { public static final String TEZ_TASK_LOG_LEVEL = TEZ_TASK_PREFIX + "log.level"; public static final String TEZ_TASK_LOG_LEVEL_DEFAULT = "INFO"; + /** + * By this option, user can easily override the logging pattern which is applied in + * TezContainerLogAppender in AM, regardless of the environmental settings. + */ + @ConfigurationScope(Scope.AM) + @ConfigurationProperty + public static final String TEZ_LOG_PATTERN_LAYOUT_AM = TEZ_AM_PREFIX + "log.pattern.layout"; + + /** + * By this option, user can easily override the logging pattern which is applied in + * TezContainerLogAppender in tasks, regardless of the environmental settings. + */ + @ConfigurationScope(Scope.VERTEX) + @ConfigurationProperty + public static final String TEZ_LOG_PATTERN_LAYOUT_TASK = TEZ_TASK_PREFIX + "log.pattern.layout"; + + /** + * Set pattern to empty string to turn the custom log pattern feature off. + */ + public static final String TEZ_LOG_PATTERN_LAYOUT_DEFAULT = ""; + + /** + * Comma separated list of keys, which can used for defining keys in MDC. The corresponding values + * will be read from Configuration, see tez.mdc.custom.keys.conf.props for further details. + */ + @ConfigurationScope(Scope.AM) + @ConfigurationProperty + public static final String TEZ_MDC_CUSTOM_KEYS = TEZ_PREFIX + "mdc.custom.keys"; + + /** + * Comma separated list of Configuration keys. Tez will try to fill MDC with key value pairs in a + * way that a key will be the nth item in tez.mdc.custom.keys and the value will be the value from + * a Configuration object pointed by the nth key of tez.mdc.custom.keys.conf.props like below: + * + * tez.mdc.custom.keys=queryId,otherKey + * tez.mdc.custom.keys.conf.props=awesome.sql.app.query.id,awesome.sql.app.other.key + * + * So MDC will contain key -{@literal >} value pairs as: + * queryId -{@literal >} conf.get("awesome.sql.app.query.id") + * otherKey -{@literal >} conf.get("awesome.sql.app.other.key") + */ + @ConfigurationScope(Scope.AM) + @ConfigurationProperty + public static final String TEZ_MDC_CUSTOM_KEYS_CONF_PROPS = TEZ_MDC_CUSTOM_KEYS + ".conf.props"; + /** * double value. Represents ratio of unique failed outputs / number of consumer * tasks. When this condition or value mentioned in {@link @@ -271,12 +330,12 @@ public TezConfiguration(boolean loadDefaults) { public static final int TEZ_TASK_MAX_ALLOWED_OUTPUT_FAILURES_DEFAULT = 10; /** - * int value. Represents the maximum time in seconds for which a consumer attempt can report - * a read error against its producer attempt, after which the producer attempt will be re-run - * to re-generate the output. There are other heuristics which determine the retry and mainly - * try to guard against a flurry of re-runs due to intermittent read errors + * int value. Represents the maximum time in seconds for which a consumer attempt can report + * a read error against its producer attempt, after which the producer attempt will be re-run + * to re-generate the output. There are other heuristics which determine the retry and mainly + * try to guard against a flurry of re-runs due to intermittent read errors * (due to network issues). This configuration puts a time limit on those heuristics to ensure - * jobs dont hang indefinitely due to lack of closure in those heuristics + * jobs dont hang indefinitely due to lack of closure in those heuristics * * Expert level setting. */ @@ -286,11 +345,29 @@ public TezConfiguration(boolean loadDefaults) { TEZ_AM_PREFIX + "max.allowed.time-sec.for-read-error"; public static final int TEZ_AM_MAX_ALLOWED_TIME_FOR_TASK_READ_ERROR_SEC_DEFAULT = 300; + /** + * Double value. Assuming that a certain number of downstream hosts reported fetch failure for a + * given upstream host, this config drives the max allowed ratio of (downstream hosts) / (all hosts). + * The total number of used hosts are tracked by AMNodeTracker, which divides the distinct number of + * downstream hosts blaming source(upstream) tasks in a given vertex. If the fraction is beyond this + * limit, the upstream task attempt is marked as failed (so blamed for the fetch failure). + * E.g. if this set to 0.2, in case of 3 different hosts reporting fetch failure + * for the same upstream host in a cluster which currently utilizes 10 nodes, the upstream task + * is immediately blamed for the fetch failure. + * + * Expert level setting. + */ + @ConfigurationScope(Scope.AM) + @ConfigurationProperty(type="integer") + public static final String TEZ_AM_MAX_ALLOWED_DOWNSTREAM_HOST_FAILURES_FRACTION = + TEZ_AM_PREFIX + "max.allowed.downstream.host.failures.fraction"; + public static final double TEZ_AM_MAX_ALLOWED_DOWNSTREAM_HOST_FAILURES_FRACTION_DEFAULT = 0.2; + /** * Boolean value. Determines when the final outputs to data sinks are committed. Commit is an - * output specific operation and typically involves making the output visible for consumption. - * If the config is true, then the outputs are committed at the end of DAG completion after all - * constituent vertices have completed. If false, outputs for each vertex are committed after that + * output specific operation and typically involves making the output visible for consumption. + * If the config is true, then the outputs are committed at the end of DAG completion after all + * constituent vertices have completed. If false, outputs for each vertex are committed after that * vertex succeeds. Depending on the desired output visibility and downstream consumer dependencies * this value must be appropriately chosen. Defaults to the safe choice of true. */ @@ -330,8 +407,11 @@ public TezConfiguration(boolean loadDefaults) { @ConfigurationScope(Scope.AM) @ConfigurationProperty public static final String TEZ_AM_LAUNCH_CMD_OPTS = TEZ_AM_PREFIX + "launch.cmd-opts"; - public static final String TEZ_AM_LAUNCH_CMD_OPTS_DEFAULT = + public static final String TEZ_AM_LAUNCH_CMD_OPTS_JDK8_DEFAULT = "-XX:+PrintGCDetails -verbose:gc -XX:+PrintGCTimeStamps -XX:+UseNUMA -XX:+UseParallelGC"; + public static final String TEZ_AM_LAUNCH_CMD_OPTS_JDK9_DEFAULT = + "-verbose:gc -Xlog:gc*,safepoint::time,uptime -XX:+UseNUMA -XX:+UseParallelGC"; + public static final String TEZ_AM_LAUNCH_CMD_OPTS_DEFAULT; /** * String value. Command line options which will be prepended to {@link @@ -355,8 +435,21 @@ public TezConfiguration(boolean loadDefaults) { @ConfigurationProperty public static final String TEZ_TASK_LAUNCH_CMD_OPTS = TEZ_TASK_PREFIX + "launch.cmd-opts"; - public static final String TEZ_TASK_LAUNCH_CMD_OPTS_DEFAULT = + public static final String TEZ_TASK_LAUNCH_CMD_OPTS_JDK8_DEFAULT = "-XX:+PrintGCDetails -verbose:gc -XX:+PrintGCTimeStamps -XX:+UseNUMA -XX:+UseParallelGC"; + public static final String TEZ_TASK_LAUNCH_CMD_OPTS_JDK9_DEFAULT = + "-verbose:gc -Xlog:gc*,safepoint::time,uptime -XX:+UseNUMA -XX:+UseParallelGC"; + public static final String TEZ_TASK_LAUNCH_CMD_OPTS_DEFAULT; + + static { + if (TezCommonUtils.getJavaVersion() >= 9) { + TEZ_AM_LAUNCH_CMD_OPTS_DEFAULT = TEZ_AM_LAUNCH_CMD_OPTS_JDK9_DEFAULT; + TEZ_TASK_LAUNCH_CMD_OPTS_DEFAULT = TEZ_TASK_LAUNCH_CMD_OPTS_JDK9_DEFAULT; + } else { + TEZ_AM_LAUNCH_CMD_OPTS_DEFAULT = TEZ_AM_LAUNCH_CMD_OPTS_JDK8_DEFAULT; + TEZ_TASK_LAUNCH_CMD_OPTS_DEFAULT = TEZ_TASK_LAUNCH_CMD_OPTS_JDK8_DEFAULT; + } + } /** * Double value. Tez automatically determines the Xmx for the JVMs used to run @@ -408,6 +501,19 @@ public TezConfiguration(boolean loadDefaults) { + "launch.env"; public static final String TEZ_AM_LAUNCH_ENV_DEFAULT = ""; + /** + * String value. In the presence of concurrent input edge to a vertex, this describes + * the timing of scheduling downstream vertex tasks. It may be closely related to the + * type of event that will contribute to a scheduling decision. + */ + @ConfigurationScope(Scope.VERTEX) + @ConfigurationProperty + public static final String TEZ_CONCURRENT_EDGE_TRIGGER_TYPE = + TEZ_TASK_PREFIX + "concurrent.edge.trigger.type"; + public static final String TEZ_CONCURRENT_EDGE_TRIGGER_TYPE_DEFAULT = + ConcurrentEdgeTriggerType.SOURCE_VERTEX_CONFIGURED.name(); + + /** * String value. Env settings will be merged with {@link #TEZ_TASK_LAUNCH_ENV} * during the launch of the task process. This property will typically be configured to @@ -508,16 +614,16 @@ public TezConfiguration(boolean loadDefaults) { @Unstable /** - * Boolean value. Enable speculative execution of slower tasks. This can help reduce job latency + * Boolean value. Enable speculative execution of slower tasks. This can help reduce job latency * when some tasks are running slower due bad/slow machines */ @ConfigurationScope(Scope.VERTEX) // TODO Verify the vertex speculation, TEZ-1788 @ConfigurationProperty(type="boolean") public static final String TEZ_AM_SPECULATION_ENABLED = TEZ_AM_PREFIX + "speculation.enabled"; public static final boolean TEZ_AM_SPECULATION_ENABLED_DEFAULT = false; - + /** - * Float value. Specifies how many standard deviations away from the mean task execution time + * Float value. Specifies how many standard deviations away from the mean task execution time * should be considered as an outlier/slow task. */ @Unstable @@ -526,16 +632,129 @@ public TezConfiguration(boolean loadDefaults) { public static final String TEZ_AM_LEGACY_SPECULATIVE_SLOWTASK_THRESHOLD = TEZ_AM_PREFIX + "legacy.speculative.slowtask.threshold"; + /** + * Long value. Specifies the timeout after which tasks on a single task vertex must be speculated. + * A negative value means not to use timeout for speculation of single task vertices. + */ + @Unstable + @ConfigurationScope(Scope.AM) + @ConfigurationProperty(type="long") + public static final String TEZ_AM_LEGACY_SPECULATIVE_SINGLE_TASK_VERTEX_TIMEOUT = + TEZ_AM_PREFIX + "legacy.speculative.single.task.vertex.timeout"; + public static final long TEZ_AM_LEGACY_SPECULATIVE_SINGLE_TASK_VERTEX_TIMEOUT_DEFAULT = -1; + + @Private + public static final String TEZ_SPECULATOR_PREFIX = TEZ_AM_PREFIX + "speculator."; + @Private + public static final String TEZ_ESTIMATOR_PREFIX = TEZ_AM_PREFIX + "task.estimator."; + /** + * Long value. Specifies amount of time (in ms) that needs to elapse to do the next round of + * speculation if there is no task speculated in this round. + */ + @Unstable + @ConfigurationScope(Scope.VERTEX) + @ConfigurationProperty(type="long") + public static final String TEZ_AM_SOONEST_RETRY_AFTER_NO_SPECULATE = + TEZ_AM_PREFIX + "soonest.retry.after.no.speculate"; + public static final long TEZ_AM_SOONEST_RETRY_AFTER_NO_SPECULATE_DEFAULT = 1000L * 1L; + + /** + * Long value. Specifies amount of time (in ms) that needs to elapse to do the next round of + * speculation if there are tasks speculated in this round. + */ + @Unstable + @ConfigurationScope(Scope.VERTEX) + @ConfigurationProperty(type="long") + public static final String TEZ_AM_SOONEST_RETRY_AFTER_SPECULATE= + TEZ_AM_PREFIX + "soonest.retry.after.speculate"; + public static final long TEZ_AM_SOONEST_RETRY_AFTER_SPECULATE_DEFAULT = 1000L * 15L; + + /** The class that should be used for speculative execution calculations. */ + @ConfigurationScope(Scope.VERTEX) + @ConfigurationProperty + public static final String TEZ_AM_SPECULATOR_CLASS = + TEZ_SPECULATOR_PREFIX + "class"; + /** The class that should be used for task runtime estimation. */ + @ConfigurationScope(Scope.VERTEX) + @ConfigurationProperty + public static final String TEZ_AM_TASK_ESTIMATOR_CLASS = + TEZ_ESTIMATOR_PREFIX + "class"; + /** + * Long value. Specifies amount of time (in ms) of the lambda value in the + * smoothing function of the task estimator + */ + @Unstable + @ConfigurationScope(Scope.VERTEX) + @ConfigurationProperty(type="long") + public static final String TEZ_AM_ESTIMATOR_EXPONENTIAL_LAMBDA_MS = + TEZ_ESTIMATOR_PREFIX + "exponential.lambda.ms"; + public static final long TEZ_AM_ESTIMATOR_EXPONENTIAL_LAMBDA_MS_DEFAULT = + TimeUnit.SECONDS.toMillis(120); + + /** + * The window length in the simple exponential smoothing that considers the + * task attempt is stagnated. + */ + @Unstable + @ConfigurationScope(Scope.VERTEX) + @ConfigurationProperty(type="long") + public static final String TEZ_AM_ESTIMATOR_EXPONENTIAL_STAGNATED_MS = + TEZ_ESTIMATOR_PREFIX + "exponential.stagnated.ms"; + public static final long TEZ_AM_ESTIMATOR_EXPONENTIAL_STAGNATED_MS_DEFAULT = + TimeUnit.SECONDS.toMillis(360); + + /** + * The number of initial readings that the estimator ignores before giving a + * prediction. At the beginning the smooth estimator won't be accurate in + * prediction + */ + @Unstable + @ConfigurationScope(Scope.VERTEX) + @ConfigurationProperty(type="integer") + public static final String TEZ_AM_ESTIMATOR_EXPONENTIAL_SKIP_INITIALS = + TEZ_ESTIMATOR_PREFIX + "exponential.skip.initials"; + public static final int TEZ_AM_ESTIMATOR_EXPONENTIAL_SKIP_INITIALS_DEFAULT = 24; + + /** + * Double value. The max percent (0-1) of running tasks that can be speculatively re-executed at any time. + */ + @Unstable + @ConfigurationScope(Scope.VERTEX) + @ConfigurationProperty(type="double") + public static final String TEZ_AM_PROPORTION_RUNNING_TASKS_SPECULATABLE = + TEZ_AM_PREFIX + "proportion.running.tasks.speculatable"; + public static final double TEZ_AM_PROPORTION_RUNNING_TASKS_SPECULATABLE_DEFAULT = 0.1; + + /** + * Double value. The max percent (0-1) of all tasks that can be speculatively re-executed at any time. + */ + @Unstable + @ConfigurationScope(Scope.VERTEX) + @ConfigurationProperty(type="double") + public static final String TEZ_AM_PROPORTION_TOTAL_TASKS_SPECULATABLE = + TEZ_AM_PREFIX + "proportion.total.tasks.speculatable"; + public static final double TEZ_AM_PROPORTION_TOTAL_TASKS_SPECULATABLE_DEFAULT = 0.01; + + /** + * Integer value. The minimum allowed tasks that can be speculatively re-executed at any time. + */ + @Unstable + @ConfigurationScope(Scope.VERTEX) + @ConfigurationProperty(type="integer") + public static final String TEZ_AM_MINIMUM_ALLOWED_SPECULATIVE_TASKS = + TEZ_AM_PREFIX + "minimum.allowed.speculative.tasks"; + public static final int TEZ_AM_MINIMUM_ALLOWED_SPECULATIVE_TASKS_DEFAULT = 10; + /** * Int value. Upper limit on the number of threads user to launch containers in the app - * master. Expert level setting. + * master. Expert level setting. */ @ConfigurationScope(Scope.AM) @ConfigurationProperty(type="integer") public static final String TEZ_AM_CONTAINERLAUNCHER_THREAD_COUNT_LIMIT = TEZ_AM_PREFIX + "containerlauncher.thread-count-limit"; - public static final int TEZ_AM_CONTAINERLAUNCHER_THREAD_COUNT_LIMIT_DEFAULT = + public static final int TEZ_AM_CONTAINERLAUNCHER_THREAD_COUNT_LIMIT_DEFAULT = 500; @@ -549,8 +768,8 @@ public TezConfiguration(boolean loadDefaults) { public static final int TEZ_AM_MAX_TASK_FAILURES_PER_NODE_DEFAULT = 10; /** - * Int value. Specifies the number of times the app master can be launched in order to recover - * from app master failure. Typically app master failures are non-recoverable. This parameter + * Int value. Specifies the number of times the app master can be launched in order to recover + * from app master failure. Typically app master failures are non-recoverable. This parameter * is for cases where the app master is not at fault but is lost due to system errors. * Expert level setting. */ @@ -571,7 +790,7 @@ public TezConfiguration(boolean loadDefaults) { public static final int TEZ_AM_VERTEX_MAX_TASK_CONCURRENCY_DEFAULT = -1; /** - * Int value. The maximum number of attempts that can fail for a particular task before the task is failed. + * Int value. The maximum number of attempts that can fail for a particular task before the task is failed. * This does not count killed attempts. Task failure results in DAG failure. */ @ConfigurationScope(Scope.VERTEX) @@ -580,9 +799,19 @@ public TezConfiguration(boolean loadDefaults) { TEZ_AM_PREFIX + "task.max.failed.attempts"; public static final int TEZ_AM_TASK_MAX_FAILED_ATTEMPTS_DEFAULT = 4; + /** + * Int value. The maximum number of attempts that can run for a particular task before the task is + * failed. This count every attempts, including failed, killed attempts. Task failure results in + * DAG failure. Default is 0, which disables this feature. + */ + @ConfigurationScope(Scope.VERTEX) + @ConfigurationProperty(type = "integer") + public static final String TEZ_AM_TASK_MAX_ATTEMPTS = TEZ_AM_PREFIX + "task.max.attempts"; + public static final int TEZ_AM_TASK_MAX_ATTEMPTS_DEFAULT = 0; + /** * Boolean value. Specifies whether a re-scheduled attempt of a task, caused by previous - * failures gets special treatment - higher priority, dropped location hints. + * failures gets higher priority */ @ConfigurationScope(Scope.VERTEX) @ConfigurationProperty(type="boolean") @@ -591,7 +820,17 @@ public TezConfiguration(boolean loadDefaults) { public static final boolean TEZ_AM_TASK_RESCHEDULE_HIGHER_PRIORITY_DEFAULT=true; /** - * Boolean value. Enabled blacklisting of nodes of nodes that are considered faulty. These nodes + * Boolean value. Specifies whether a re-scheduled attempt of a task, caused by previous + * failure get relaxed locality + */ + @ConfigurationScope(Scope.VERTEX) + @ConfigurationProperty(type="boolean") + public static final String TEZ_AM_TASK_RESCHEDULE_RELAXED_LOCALITY = + TEZ_AM_PREFIX + "task.reschedule.relaxed.locality"; + public static final boolean TEZ_AM_TASK_RESCHEDULE_RELAXED_LOCALITY_DEFAULT=true; + + /** + * Boolean value. Enabled blacklisting of nodes of nodes that are considered faulty. These nodes * will not be used to execute tasks. */ @ConfigurationScope(Scope.AM) @@ -599,11 +838,11 @@ public TezConfiguration(boolean loadDefaults) { public static final String TEZ_AM_NODE_BLACKLISTING_ENABLED = TEZ_AM_PREFIX + "node-blacklisting.enabled"; public static final boolean TEZ_AM_NODE_BLACKLISTING_ENABLED_DEFAULT = true; - + /** * Int value. Specifies the percentage of nodes in the cluster that may be considered faulty. - * This limits the number of nodes that are blacklisted in an effort to minimize the effects of - * temporary surges in failures (e.g. due to network outages). + * This limits the number of nodes that are blacklisted in an effort to minimize the effects of + * temporary surges in failures (e.g. due to network outages). */ @ConfigurationScope(Scope.AM) @ConfigurationProperty(type="integer") @@ -630,7 +869,7 @@ public TezConfiguration(boolean loadDefaults) { public static final String TEZ_AM_CLIENT_THREAD_COUNT = TEZ_AM_PREFIX + "client.am.thread-count"; public static final int TEZ_AM_CLIENT_THREAD_COUNT_DEFAULT = 2; - + /** * String value. Range of ports that the AM can use when binding for client connections. Leave blank * to use all possible ports. Expert level setting. It's hadoop standard range configuration. @@ -659,6 +898,26 @@ public TezConfiguration(boolean loadDefaults) { public static final String TEZ_AM_DAG_SCHEDULER_CLASS_DEFAULT = "org.apache.tez.dag.app.dag.impl.DAGSchedulerNaturalOrder"; + /** + * String value. The class to be used for the YARN task scheduler. Expert level setting. + */ + @ConfigurationScope(Scope.AM) + @ConfigurationProperty + public static final String TEZ_AM_YARN_SCHEDULER_CLASS = TEZ_AM_PREFIX + "yarn.scheduler.class"; + public static final String TEZ_AM_YARN_SCHEDULER_CLASS_DEFAULT = + "org.apache.tez.dag.app.rm.DagAwareYarnTaskScheduler"; + + /** + * Int value. The AM waits this amount of time when the first DAG is submitted but not all the services are ready. + * This can happen when the client RPC handler is up and able to accept DAGs but e.g. task scheduler + * manager is not ready (e.g. a task scheduler is waiting for external resources). + * A value equal or less than 0 is not supported and leads to an exception. + */ + @ConfigurationScope(Scope.AM) + @ConfigurationProperty + public static final String TEZ_AM_READY_FOR_SUBMIT_TIMEOUT_MS = TEZ_AM_PREFIX + "ready.for.submit.timeout.ms"; + public static final int TEZ_AM_READY_FOR_SUBMIT_TIMEOUT_MS_DEFAULT = 30000; + /** Int value. The amount of memory in MB to be used by the AppMaster */ @ConfigurationScope(Scope.AM) @ConfigurationProperty(type="integer") @@ -681,7 +940,33 @@ public TezConfiguration(boolean loadDefaults) { public static final boolean TEZ_AM_DAG_CLEANUP_ON_COMPLETION_DEFAULT = false; /** - * Int value. Upper limit on the number of threads used to delete DAG directories on nodes. + * Integer value. Instructs AM to delete vertex shuffle data if a vertex and all its + * child vertices at a certain depth are completed. Value less than or equal to 0 indicates the feature + * is disabled. + * Let's say we have a dag Map1 - Reduce2 - Reduce3 - Reduce4. + * case:1 height = 1 + * when Reduce 2 completes all the shuffle data of Map1 will be deleted and so on for other vertex. + * case: 2 height = 2 + * when Reduce 3 completes all the shuffle data of Map1 will be deleted and so on for other vertex. + */ + @ConfigurationScope(Scope.AM) + @ConfigurationProperty(type="integer") + public static final String TEZ_AM_VERTEX_CLEANUP_HEIGHT = TEZ_AM_PREFIX + + "vertex.cleanup.height"; + public static final int TEZ_AM_VERTEX_CLEANUP_HEIGHT_DEFAULT = 0; + + /** + * Boolean value. Instructs AM to delete intermediate attempt data for failed task attempts. + */ + @ConfigurationScope(Scope.AM) + @ConfigurationProperty(type="boolean") + public static final String TEZ_AM_TASK_ATTEMPT_CLEANUP_ON_FAILURE = TEZ_AM_PREFIX + + "task.attempt.cleanup.on.failure"; + public static final boolean TEZ_AM_TASK_ATTEMPT_CLEANUP_ON_FAILURE_DEFAULT = false; + + /** + * Int value. Upper limit on the number of threads used to delete DAG directories, + * Vertex directories and failed task attempts directories on nodes. */ @ConfigurationScope(Scope.AM) @ConfigurationProperty(type="integer") @@ -690,8 +975,18 @@ public TezConfiguration(boolean loadDefaults) { public static final int TEZ_AM_DAG_CLEANUP_THREAD_COUNT_LIMIT_DEFAULT = 10; + /** + * Int value. Upper limit on the number of threads used by app context (vertex management and input init events). + */ + @ConfigurationScope(Scope.AM) + @ConfigurationProperty(type="integer") + public static final String TEZ_AM_DAG_APPCONTEXT_THREAD_COUNT_LIMIT = + TEZ_AM_PREFIX + "dag.appcontext.thread-count-limit"; + + public static final int TEZ_AM_DAG_APPCONTEXT_THREAD_COUNT_LIMIT_DEFAULT = 10; + /** Int value. The amount of memory in MB to be used by tasks. This applies to all tasks across - * all vertices. Setting it to the same value for all tasks is helpful for container reuse and + * all vertices. Setting it to the same value for all tasks is helpful for container reuse and * thus good for performance typically. */ @ConfigurationScope(Scope.DAG) // TODO vertex level @ConfigurationProperty(type="integer") @@ -706,7 +1001,7 @@ public TezConfiguration(boolean loadDefaults) { @ConfigurationProperty(type="integer") public static final String TEZ_TASK_RESOURCE_CPU_VCORES = TEZ_TASK_PREFIX + "resource.cpu.vcores"; - public static final int TEZ_TASK_RESOURCE_CPU_VCORES_DEFAULT = 1; + public static final int TEZ_TASK_RESOURCE_CPU_VCORES_DEFAULT = 1; /** * Int value. The maximum heartbeat interval between the AM and RM in milliseconds @@ -721,7 +1016,7 @@ public TezConfiguration(boolean loadDefaults) { /** * Int value. The maximum amount of time, in milliseconds, to wait before a task asks an - * AM for another task. Increasing this can help improve app master scalability for a large + * AM for another task. Increasing this can help improve app master scalability for a large * number of concurrent tasks. Expert level setting. */ @ConfigurationScope(Scope.AM) @@ -731,7 +1026,7 @@ public TezConfiguration(boolean loadDefaults) { public static final int TEZ_TASK_GET_TASK_SLEEP_INTERVAL_MS_MAX_DEFAULT = 200; /** - * Int value. The maximum heartbeat interval, in milliseconds, between the app master and tasks. + * Int value. The maximum heartbeat interval, in milliseconds, between the app master and tasks. * Increasing this can help improve app master scalability for a large number of concurrent tasks. * Expert level setting. */ @@ -742,8 +1037,8 @@ public TezConfiguration(boolean loadDefaults) { public static final int TEZ_TASK_AM_HEARTBEAT_INTERVAL_MS_DEFAULT = 100; /** - * Int value. Interval, in milliseconds, after which counters are sent to AM in heartbeat from - * tasks. This reduces the amount of network traffice between AM and tasks to send high-volume + * Int value. Interval, in milliseconds, after which counters are sent to AM in heartbeat from + * tasks. This reduces the amount of network traffice between AM and tasks to send high-volume * counters. Improves AM scalability. Expert level setting. */ @ConfigurationScope(Scope.AM) @@ -762,7 +1057,7 @@ public TezConfiguration(boolean loadDefaults) { public static final String TEZ_TASK_MAX_EVENTS_PER_HEARTBEAT = TEZ_TASK_PREFIX + "max-events-per-heartbeat"; public static final int TEZ_TASK_MAX_EVENTS_PER_HEARTBEAT_DEFAULT = 500; - + /** * Int value. Maximum number of pending task events before a task will stop * asking for more events in the task heartbeat. @@ -797,16 +1092,16 @@ public TezConfiguration(boolean loadDefaults) { public static final boolean TEZ_TASK_INITIALIZE_PROCESSOR_IO_SERIALLY_DEFAULT = false; /** - * Long value. Interval, in milliseconds, within which any of the tasks Input/Processor/Output - * components need to make successive progress notifications. If the progress is not notified + * Long value. Interval, in milliseconds, within which any of the tasks Input/Processor/Output + * components need to make successive progress notifications. If the progress is not notified * for this interval then the task will be considered hung and terminated. - * The value for this config should be larger than {@link TezConfiguration#TASK_HEARTBEAT_TIMEOUT_MS} + * The value for this config should be larger than {@link TezConfiguration#TASK_HEARTBEAT_TIMEOUT_MS} * and larger than 2 times the value of {@link TezConfiguration#TEZ_TASK_AM_HEARTBEAT_INTERVAL_MS}. * A config value <=0 disables this. */ @ConfigurationScope(Scope.VERTEX) @ConfigurationProperty - public static final String TEZ_TASK_PROGRESS_STUCK_INTERVAL_MS = TEZ_TASK_PREFIX + + public static final String TEZ_TASK_PROGRESS_STUCK_INTERVAL_MS = TEZ_TASK_PREFIX + "progress.stuck.interval-ms"; public static final long TEZ_TASK_PROGRESS_STUCK_INTERVAL_MS_DEFAULT = -1; @@ -980,7 +1275,7 @@ public TezConfiguration(boolean loadDefaults) { /** * Boolean value. Whether to reuse containers for non-local tasks. Active only if reuse is - * enabled. Turning this on can severely affect locality and can be bad for jobs with high data + * enabled. Turning this on can severely affect locality and can be bad for jobs with high data * volume being read from the primary data sources. */ @ConfigurationScope(Scope.AM) @@ -990,6 +1285,20 @@ public TezConfiguration(boolean loadDefaults) { public static final boolean TEZ_AM_CONTAINER_REUSE_NON_LOCAL_FALLBACK_ENABLED_DEFAULT = false; + /** + * Boolean value. Whether to reuse new containers that could not be immediately assigned to + * pending requests. If enabled then newly assigned containers that cannot be immediately + * allocated will be held for potential reuse as if it were a container that had just completed + * a task. If disabled then newly assigned containers that cannot be immediately allocated will + * be released. Active only if container reuse is enabled. + */ + @ConfigurationScope(Scope.AM) + @ConfigurationProperty(type="boolean") + public static final String TEZ_AM_CONTAINER_REUSE_NEW_CONTAINERS_ENABLED = + TEZ_AM_PREFIX + "container.reuse.new-containers.enabled"; + public static final boolean + TEZ_AM_CONTAINER_REUSE_NEW_CONTAINERS_ENABLED_DEFAULT = false; + /** * Int value. The amount of time to wait before assigning a container to the next level * of locality. NODE -> RACK -> NON_LOCAL. Delay scheduling parameter. Expert level setting. @@ -1003,15 +1312,15 @@ public TezConfiguration(boolean loadDefaults) { TEZ_AM_CONTAINER_REUSE_LOCALITY_DELAY_ALLOCATION_MILLIS_DEFAULT = 250l; /** - * Int value. The minimum amount of time to hold on to a container that is idle. Only active when - * reuse is enabled. Set to -1 to never release idle containers (not recommended). + * Int value. The minimum amount of time to hold on to a container that is idle. Only active when + * reuse is enabled. Set to -1 to never release idle containers (not recommended). */ @ConfigurationScope(Scope.AM) @ConfigurationProperty(type="integer") public static final String TEZ_AM_CONTAINER_IDLE_RELEASE_TIMEOUT_MIN_MILLIS = TEZ_AM_PREFIX + "container.idle.release-timeout-min.millis"; public static final long - TEZ_AM_CONTAINER_IDLE_RELEASE_TIMEOUT_MIN_MILLIS_DEFAULT = 5000l; + TEZ_AM_CONTAINER_IDLE_RELEASE_TIMEOUT_MIN_MILLIS_DEFAULT = 5000l; /** * Int value. The maximum amount of time to hold on to a container if no task can be @@ -1020,7 +1329,7 @@ public TezConfiguration(boolean loadDefaults) { * TezConfiguration#TEZ_AM_CONTAINER_IDLE_RELEASE_TIMEOUT_MIN_MILLIS. * Containers will have an expire time set to a random value between * TezConfiguration#TEZ_AM_CONTAINER_IDLE_RELEASE_TIMEOUT_MIN_MILLIS && - * TezConfiguration#TEZ_AM_CONTAINER_IDLE_RELEASE_TIMEOUT_MAX_MILLIS. This + * TezConfiguration#TEZ_AM_CONTAINER_IDLE_RELEASE_TIMEOUT_MAX_MILLIS. This * creates a graceful reduction in the amount of idle resources held */ @ConfigurationScope(Scope.AM) @@ -1029,9 +1338,9 @@ public TezConfiguration(boolean loadDefaults) { TEZ_AM_PREFIX + "container.idle.release-timeout-max.millis"; public static final long TEZ_AM_CONTAINER_IDLE_RELEASE_TIMEOUT_MAX_MILLIS_DEFAULT = 10000l; - + /** - * Int value. The minimum number of containers that will be held in session mode. Not active in + * Int value. The minimum number of containers that will be held in session mode. Not active in * non-session mode. Enables an idle session (not running any DAG) to hold on to a minimum number * of containers to provide fast response times for the next DAG. */ @@ -1042,7 +1351,7 @@ public TezConfiguration(boolean loadDefaults) { public static final int TEZ_AM_SESSION_MIN_HELD_CONTAINERS_DEFAULT = 0; /** - * Boolean value. Allow/disable logging for all dags in a session + * Boolean value. Allow/disable logging for all dags in a session */ @Private @ConfigurationScope(Scope.AM) @@ -1075,7 +1384,7 @@ public TezConfiguration(boolean loadDefaults) { public static final float TEZ_VERTEX_FAILURES_MAXPERCENT_DEFAULT = 0.0f; /** * Int value. The number of RM heartbeats to wait after preempting running tasks before preempting - * more running tasks. After preempting a task, we need to wait at least 1 heartbeat so that the + * more running tasks. After preempting a task, we need to wait at least 1 heartbeat so that the * RM can act on the released resources and assign new ones to us. Expert level setting. */ @ConfigurationScope(Scope.AM) @@ -1086,8 +1395,8 @@ public TezConfiguration(boolean loadDefaults) { /** * Int value. Time (in millisecs) that an unsatisfied request will wait before preempting other - * resources. In rare cases, the cluster says there are enough free resources but does not end - * up getting enough on a node to actually assign it to the job. This configuration tries to put + * resources. In rare cases, the cluster says there are enough free resources but does not end + * up getting enough on a node to actually assign it to the job. This configuration tries to put * a deadline on such wait to prevent indefinite job hangs. */ @ConfigurationScope(Scope.AM) @@ -1125,7 +1434,7 @@ public TezConfiguration(boolean loadDefaults) { * * Specify additional user classpath information to be used for Tez AM and all containers. * This will be appended to the classpath after PWD - * + * * 'tez.lib.uris.classpath' defines the relative classpath into the archives * that are set in 'tez.lib.uris' * @@ -1151,7 +1460,7 @@ public TezConfiguration(boolean loadDefaults) { public static final String TEZ_AUX_URIS = TEZ_PREFIX + "aux.uris"; /** - * Boolean value. Allows to ignore 'tez.lib.uris'. Useful during development as well as + * Boolean value. Allows to ignore 'tez.lib.uris'. Useful during development as well as * raw Tez application where classpath is propagated with application * via {@link LocalResource}s. This is mainly useful for developer/debugger scenarios. */ @@ -1217,8 +1526,8 @@ public TezConfiguration(boolean loadDefaults) { /** * Int value. Time (in seconds) to wait for AM to come up when trying to submit a DAG - * from the client. Only relevant in session mode. If the cluster is busy and cannot launch the - * AM then this timeout may be hit. In those case, using non-session mode is recommended if + * from the client. Only relevant in session mode. If the cluster is busy and cannot launch the + * AM then this timeout may be hit. In those case, using non-session mode is recommended if * applicable. Otherwise increase the timeout (set to -1 for infinity. Not recommended) */ @ConfigurationScope(Scope.AM) @@ -1389,7 +1698,7 @@ public TezConfiguration(boolean loadDefaults) { public static final int TEZ_HISTORY_LOGGING_TIMELINE_NUM_DAGS_PER_GROUP_DEFAULT = 1; /** - * String value. The directory into which history data will be written. This defaults to the + * String value. The directory into which history data will be written. This defaults to the * container logging directory. This is relevant only when SimpleHistoryLoggingService is being * used for {@link TezConfiguration#TEZ_HISTORY_LOGGING_SERVICE_CLASS} */ @@ -1408,6 +1717,59 @@ public TezConfiguration(boolean loadDefaults) { TEZ_PREFIX + "simple.history.max.errors"; public static final int TEZ_SIMPLE_HISTORY_LOGGING_MAX_ERRORS_DEFAULT = 10; + /** + * String value. The base directory into which history data will be written when proto history + * logging service is used for {@link TezConfiguration#TEZ_HISTORY_LOGGING_SERVICE_CLASS}. + * If this is not set, then logging is disabled for ProtoHistoryLoggingService. + */ + @ConfigurationScope(Scope.AM) + @ConfigurationProperty + public static final String TEZ_HISTORY_LOGGING_PROTO_BASE_DIR = + TEZ_PREFIX + "history.logging.proto-base-dir"; + + /** + * Long value. The amount of time in seconds to wait to ensure all events for a day is synced + * to disk. This should be maximum time variation b/w machines + maximum time to sync file + * content and metadata. + */ + @ConfigurationScope(Scope.AM) + @ConfigurationProperty(type="long") + public static final String TEZ_HISTORY_LOGGING_PROTO_SYNC_WINDOWN_SECS = + TEZ_PREFIX + "history.logging.proto-sync-window-secs"; + public static final long TEZ_HISTORY_LOGGING_PROTO_SYNC_WINDOWN_SECS_DEFAULT = 60L; + + /** + * Int value. Maximum queue size for proto history event logger. + */ + @ConfigurationScope(Scope.AM) + @ConfigurationProperty(type="integer") + public static final String TEZ_HISTORY_LOGGING_PROTO_QUEUE_SIZE = + TEZ_PREFIX + "history.logging.queue.size"; + public static final int TEZ_HISTORY_LOGGING_PROTO_QUEUE_SIZE_DEFAULT = 100000; + + + /** + * Boolean value. Set this to true, if the underlying file system does not support flush (Ex: s3). + * The dag submitted, initialized and started events are written into a file and closed. The rest + * of the events are written into another file. + */ + @ConfigurationScope(Scope.AM) + @ConfigurationProperty(type="boolean") + public static final String TEZ_HISTORY_LOGGING_PROTO_SPLIT_DAG_START = + TEZ_PREFIX + "history.logging.split-dag-start"; + public static final boolean TEZ_HISTORY_LOGGING_PROTO_SPLIT_DAG_START_DEFAULT = false; + + /** + * Long value. The amount of time in seconds to wait to ensure all events for a day is synced + * to disk. This should be maximum time variation b/w machines + maximum time to sync file + * content and metadata. + */ + @ConfigurationScope(Scope.AM) + @ConfigurationProperty(type="boolean") + public static final String TEZ_HISTORY_LOGGING_PROTO_DOAS = + TEZ_PREFIX + "history.logging.proto-doas"; + public static final boolean TEZ_HISTORY_LOGGING_PROTO_DOAS_DEFAULT = false; + /** * Int value. Time, in milliseconds, to wait while flushing YARN ATS data during shutdown. * Expert level setting. @@ -1466,7 +1828,7 @@ public TezConfiguration(boolean loadDefaults) { + "yarn.ats.acl.dag.domain.id"; /** - * Boolean value. Enable recovery of DAGs. This allows a restarted app master to recover the + * Boolean value. Enable recovery of DAGs. This allows a restarted app master to recover the * incomplete DAGs from the previous instance of the app master. */ @ConfigurationScope(Scope.AM) @@ -1475,6 +1837,18 @@ public TezConfiguration(boolean loadDefaults) { TEZ_PREFIX + "dag.recovery.enabled"; public static final boolean DAG_RECOVERY_ENABLED_DEFAULT = true; + + /** + * Boolean value. When set, this enables AM to fail when DAG recovery is enabled and + * restarted app master did not find anything to recover + * Expert level setting. + */ + @ConfigurationScope(Scope.AM) + @ConfigurationProperty(type="boolean") + public static final String TEZ_AM_FAILURE_ON_MISSING_RECOVERY_DATA = + TEZ_AM_PREFIX + "failure.on.missing.recovery.data"; + public static final boolean TEZ_AM_FAILURE_ON_MISSING_RECOVERY_DATA_DEFAULT = false; + /** * Int value. Size in bytes for the IO buffer size while processing the recovery file. * Expert level setting. @@ -1514,6 +1888,28 @@ public TezConfiguration(boolean loadDefaults) { public static final boolean TEZ_LOCAL_MODE_DEFAULT = false; + /** + * Boolean value. Enable local mode execution in Tez without using network for communicating with + * DAGAppMaster. This option only makes sense when {@link #TEZ_LOCAL_MODE} is true. When + * TEZ_LOCAL_MODE_WITHOUT_NETWORK is turned on, LocalClient will call DAGAppMaster's methods + * directly. + */ + @ConfigurationScope(Scope.AM) + @ConfigurationProperty(type = "boolean") + public static final String TEZ_LOCAL_MODE_WITHOUT_NETWORK = + TEZ_PREFIX + "local.mode.without.network"; + + public static final boolean TEZ_LOCAL_MODE_WITHOUT_NETWORK_DEFAULT = false; + + /** + * String value. TezLocalCacheManager uses this folder as a root for temp and localized files. + */ + @ConfigurationScope(Scope.VERTEX) + @ConfigurationProperty + public static final String TEZ_LOCAL_CACHE_ROOT_FOLDER = TEZ_PREFIX + "local.cache.root.folder"; + + public static final String TEZ_LOCAL_CACHE_ROOT_FOLDER_DEFAULT = "."; + /** * Tez AM Inline Mode flag. Not valid till Tez-684 get checked-in */ @@ -1531,7 +1927,7 @@ public TezConfiguration(boolean loadDefaults) { /** * Int value. - * The maximium number of tasks running in parallel within the app master process. + * The maximum number of tasks running in parallel within the app master process. */ @ConfigurationScope(Scope.AM) @ConfigurationProperty(type="integer") @@ -1565,10 +1961,10 @@ public TezConfiguration(boolean loadDefaults) { public static final boolean TEZ_AM_ACLS_ENABLED_DEFAULT = true; /** - * String value. + * String value. * AM view ACLs. This allows the specified users/groups to view the status of the AM and all DAGs * that run within this AM. - * Comma separated list of users, followed by whitespace, followed by a comma separated list of + * Comma separated list of users, followed by whitespace, followed by a comma separated list of * groups */ @ConfigurationScope(Scope.AM) @@ -1579,7 +1975,7 @@ public TezConfiguration(boolean loadDefaults) { * String value. * AM modify ACLs. This allows the specified users/groups to run modify operations on the AM * such as submitting DAGs, pre-warming the session, killing DAGs or shutting down the session. - * Comma separated list of users, followed by whitespace, followed by a comma separated list of + * Comma separated list of users, followed by whitespace, followed by a comma separated list of * groups */ @ConfigurationScope(Scope.AM) @@ -1652,13 +2048,23 @@ public TezConfiguration(boolean loadDefaults) { + "tez-ui.webservice.enable"; public static final boolean TEZ_AM_WEBSERVICE_ENABLE_DEFAULT = true; + /** + * String value. Range of ports that the AM can use for the WebUIService. Leave blank + * to use all possible ports. Expert level setting. It's hadoop standard range configuration. + * For example 50000-50050,50100-50200 + */ + @ConfigurationScope(Scope.AM) + @ConfigurationProperty(type="boolean") + public static final String TEZ_AM_WEBSERVICE_PORT_RANGE = TEZ_AM_PREFIX + + "tez-ui.webservice.port-range"; + + public static final String TEZ_AM_WEBSERVICE_PORT_RANGE_DEFAULT = "50000-50050"; + // TODO only validate property here, value can also be validated if necessary public static void validateProperty(String property, Scope usedScope) { Scope validScope = PropertyScope.get(property); if (validScope == null) { - if (LOG.isDebugEnabled()) { - LOG.debug(property + " is not standard configuration property of tez, can not been validated"); - } + LOG.debug("{} is not standard configuration property of tez, can not been validated", property); } else { if (usedScope.ordinal() > validScope.ordinal()) { throw new IllegalStateException(property + " is set at the scope of " + usedScope @@ -1693,6 +2099,18 @@ static Set getPropertySet() { TEZ_PREFIX + "test.minicluster.app.wait.on.shutdown.secs"; public static final long TEZ_TEST_MINI_CLUSTER_APP_WAIT_ON_SHUTDOWN_SECS_DEFAULT = 30; + /** + * Long value + * Status Cache timeout window in minutes for the DAGClient. + */ + @Private + @ConfigurationScope(Scope.CLIENT) + @ConfigurationProperty(type="long") + public static final String TEZ_CLIENT_DAG_STATUS_CACHE_TIMEOUT_SECS = TEZ_PREFIX + + "client.dag.status.cache.timeout-secs"; + // Default timeout is 60 seconds. + public static final long TEZ_CLIENT_DAG_STATUS_CACHE_TIMEOUT_SECS_DEFAULT = 60; + /** * Long value * Time to wait (in milliseconds) for yarn app's diagnotics is available @@ -1857,4 +2275,58 @@ static Set getPropertySet() { public static final String TEZ_SHARED_EXECUTOR_MAX_THREADS = "tez.shared-executor.max-threads"; public static final int TEZ_SHARED_EXECUTOR_MAX_THREADS_DEFAULT = -1; + /** + * Acquire all FileSystems info. e.g., all namenodes info of HDFS federation cluster. + */ + @ConfigurationScope(Scope.AM) + @ConfigurationProperty + public static final String TEZ_JOB_FS_SERVERS = "tez.job.fs-servers"; + + /** + * Skip delegation token renewal for specified FileSystems. + */ + @ConfigurationScope(Scope.AM) + @ConfigurationProperty + public static final String TEZ_JOB_FS_SERVERS_TOKEN_RENEWAL_EXCLUDE = "tez.job.fs-servers.token-renewal.exclude"; + + /** + * Comma-separated list of properties that MRReaderMapred should return (if present) when calling for config updates. + */ + @ConfigurationScope(Scope.VERTEX) + @ConfigurationProperty + public static final String TEZ_MRREADER_CONFIG_UPDATE_PROPERTIES = "tez.mrreader.config.update.properties"; + + /** + * Frequency at which thread dump should be captured. Supports TimeUnits. This is effective only + * when org.apache.tez.dag.app.ThreadDumpDAGHook is configured to tez.am.hooks or + * org.apache.tez.runtime.task.ThreadDumpTaskAttemptHook is configured to tez.task.attempt.hooks. + */ + @ConfigurationScope(Scope.DAG) + @ConfigurationProperty + public static final String TEZ_THREAD_DUMP_INTERVAL = "tez.thread.dump.interval"; + public static final String TEZ_THREAD_DUMP_INTERVAL_DEFAULT = "100ms"; + + /** + * Limits the amount of data that can be written to LocalFileSystem by a Task. + */ + @ConfigurationScope(Scope.DAG) + @ConfigurationProperty(type = "long") + public static final String TEZ_TASK_LOCAL_FS_WRITE_LIMIT_BYTES = "tez.task.local-fs.write-limit.bytes"; + public static final long TEZ_TASK_LOCAL_FS_WRITE_LIMIT_BYTES_DEFAULT = -1; + + /** + * Comma-separated list of hook classes implementing org.apache.tez.runtime.hook.TezDAGHook. + * e.g. org.apache.tez.dag.app.ThreadDumpDAGHook + */ + @ConfigurationScope(Scope.AM) + @ConfigurationProperty + public static final String TEZ_AM_HOOKS = TEZ_AM_PREFIX + "hooks"; + + /** + * Comma-separated list of hook classes implementing org.apache.tez.runtime.hook.TezTaskAttemptHook. + * e.g. org.apache.tez.runtime.task.ThreadDumpTaskAttemptHook + */ + @ConfigurationScope(Scope.DAG) + @ConfigurationProperty + public static final String TEZ_TASK_ATTEMPT_HOOKS = TEZ_TASK_PREFIX + "attempt.hooks"; } diff --git a/tez-api/src/main/java/org/apache/tez/dag/api/TezConfigurationConstants.java b/tez-api/src/main/java/org/apache/tez/dag/api/TezConfigurationConstants.java index 33abc77de4..4bd5e254fb 100644 --- a/tez-api/src/main/java/org/apache/tez/dag/api/TezConfigurationConstants.java +++ b/tez-api/src/main/java/org/apache/tez/dag/api/TezConfigurationConstants.java @@ -27,12 +27,14 @@ */ @ConfigurationClass(templateFileName = "tez-conf-constants.xml") @Private -public class TezConfigurationConstants { +public final class TezConfigurationConstants { static { TezConfiguration.setupConfigurationScope(TezConfigurationConstants.class); } + private TezConfigurationConstants() {} + /** * String value. Set automatically by the client. The host name of the client the Tez application * was submitted from. diff --git a/tez-api/src/main/java/org/apache/tez/dag/api/TezConstants.java b/tez-api/src/main/java/org/apache/tez/dag/api/TezConstants.java index cf5ab11ec7..379eb0cb1b 100644 --- a/tez-api/src/main/java/org/apache/tez/dag/api/TezConstants.java +++ b/tez-api/src/main/java/org/apache/tez/dag/api/TezConstants.java @@ -24,7 +24,7 @@ * Specifies all constant values in Tez */ @Private -public class TezConstants { +public final class TezConstants { public static final String TEZ_APPLICATION_MASTER_CLASS = @@ -129,4 +129,5 @@ public static String getTezUberServicePluginName() { public static final double TEZ_CONTAINER_MAX_JAVA_HEAP_FRACTION_SMALL_SLAB = 0.7; public static final double TEZ_CONTAINER_MAX_JAVA_HEAP_FRACTION_LARGE_SLAB = 0.8; + private TezConstants() {} } diff --git a/tez-api/src/main/java/org/apache/tez/dag/api/UserPayload.java b/tez-api/src/main/java/org/apache/tez/dag/api/UserPayload.java index fa617b5317..087b17ab9d 100644 --- a/tez-api/src/main/java/org/apache/tez/dag/api/UserPayload.java +++ b/tez-api/src/main/java/org/apache/tez/dag/api/UserPayload.java @@ -62,6 +62,17 @@ public static UserPayload create(@Nullable ByteBuffer payload, int version) { return new UserPayload(payload, version); } + /** + * Return the payload as a ByteBuffer. + * @return ByteBuffer. + */ + @Nullable + public ByteBuffer getRawPayload() { + // Note: Several bits of serialization, including deepCopyAsArray depend on a new instance of the + // ByteBuffer being returned, since they modify it. If changing this code to return the same + // ByteBuffer - deepCopyAsArray and TezEntityDescriptor need to be looked at. + return payload == EMPTY_BYTE ? null : payload.duplicate(); + } /** * Return the payload as a read-only ByteBuffer. * @return read-only ByteBuffer. diff --git a/tez-api/src/main/java/org/apache/tez/dag/api/Vertex.java b/tez-api/src/main/java/org/apache/tez/dag/api/Vertex.java index bf3a59ba24..8ce3fb67e6 100644 --- a/tez-api/src/main/java/org/apache/tez/dag/api/Vertex.java +++ b/tez-api/src/main/java/org/apache/tez/dag/api/Vertex.java @@ -23,6 +23,7 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.Objects; import org.apache.commons.lang.StringUtils; import org.apache.hadoop.classification.InterfaceAudience; @@ -35,7 +36,7 @@ import org.apache.tez.dag.api.VertexGroup.GroupInfo; import org.apache.tez.runtime.api.LogicalIOProcessor; -import com.google.common.base.Preconditions; +import org.apache.tez.common.Preconditions; import com.google.common.collect.Lists; import com.google.common.collect.Maps; @@ -276,10 +277,10 @@ public Map getTaskLocalFiles() { * set environment for all vertices via Tezconfiguration#TEZ_TASK_LAUNCH_ENV * @param environment * @return this Vertex + * NullPointerException if {@code environment} is {@code null} */ public Vertex setTaskEnvironment(Map environment) { - Preconditions.checkArgument(environment != null); - this.taskEnvironment.putAll(environment); + this.taskEnvironment.putAll(Objects.requireNonNull(environment)); return this; } diff --git a/tez-api/src/main/java/org/apache/tez/dag/api/VertexManagerPluginContext.java b/tez-api/src/main/java/org/apache/tez/dag/api/VertexManagerPluginContext.java index b858a6531a..0862f49034 100644 --- a/tez-api/src/main/java/org/apache/tez/dag/api/VertexManagerPluginContext.java +++ b/tez-api/src/main/java/org/apache/tez/dag/api/VertexManagerPluginContext.java @@ -21,6 +21,7 @@ import java.util.Collection; import java.util.List; import java.util.Map; +import java.util.Objects; import java.util.Set; import javax.annotation.Nullable; @@ -31,9 +32,10 @@ import org.apache.tez.dag.api.event.VertexState; import org.apache.tez.runtime.api.InputSpecUpdate; import org.apache.tez.runtime.api.VertexStatistics; +import org.apache.tez.runtime.api.events.CustomProcessorEvent; import org.apache.tez.runtime.api.events.InputDataInformationEvent; -import com.google.common.base.Preconditions; +import org.apache.tez.common.Preconditions; /** * Object with API's to interact with the Tez execution engine @@ -70,8 +72,7 @@ public class TaskWithLocationHint { Integer taskIndex; TaskLocationHint locationHint; public TaskWithLocationHint(Integer taskIndex, @Nullable TaskLocationHint locationHint) { - Preconditions.checkState(taskIndex != null); - this.taskIndex = taskIndex; + this.taskIndex = Objects.requireNonNull(taskIndex); this.locationHint = locationHint; } @@ -268,6 +269,17 @@ public void reconfigureVertex(@Nullable Map rootInputSp * task to which events need to be sent. */ public void addRootInputEvents(String inputName, Collection events); + + /** + * Allows a VertexManagerPlugin to send events of custom payload to processor + * of a specific task of managed vertex + * + * It's up to user to make sure taskId is valid + * + * @param events events to be sent + * @param taskId id of a task of managed vertex + */ + public void sendEventToProcessor(Collection events, int taskId); @Deprecated /** diff --git a/tez-api/src/main/java/org/apache/tez/dag/api/client/DAGClient.java b/tez-api/src/main/java/org/apache/tez/dag/api/client/DAGClient.java index c70da75c9e..944bff3fbd 100644 --- a/tez-api/src/main/java/org/apache/tez/dag/api/client/DAGClient.java +++ b/tez-api/src/main/java/org/apache/tez/dag/api/client/DAGClient.java @@ -103,7 +103,7 @@ public abstract VertexStatus getVertexStatus(String vertexName, public abstract void tryKillDAG() throws IOException, TezException; /** - * Wait for DAG to complete without printing any vertex statuses + * Wait forever for DAG to complete without printing any vertex statuses * * @return Final DAG Status * @throws IOException @@ -112,6 +112,20 @@ public abstract VertexStatus getVertexStatus(String vertexName, */ public abstract DAGStatus waitForCompletion() throws IOException, TezException, InterruptedException; + /** + * Wait for DAG to complete without printing any vertex statuses + * + * @param timeMs Maximum wait duration + * @return Final DAG Status, or null on timeout or if DAG is no longer running + * @throws IOException + * @throws TezException + * @throws InterruptedException + */ + public DAGStatus waitForCompletion(long timeMs) throws IOException, TezException, InterruptedException { + // Make non-abstract to avoid compat issues in Hive. + throw new UnsupportedOperationException(); + } + /** * Wait for DAG to complete and periodically print *all* vertices' status. * @@ -125,4 +139,14 @@ public abstract VertexStatus getVertexStatus(String vertexName, */ public abstract DAGStatus waitForCompletionWithStatusUpdates(@Nullable Set statusGetOpts) throws IOException, TezException, InterruptedException; + + /** + * Returns the Tez AM's web ui address if any. + * + * @return The http web UI address + * @throws IOException + * @throws TezException + */ + public abstract String getWebUIAddress() throws IOException, TezException; + } diff --git a/tez-api/src/main/java/org/apache/tez/dag/api/client/DAGClientImpl.java b/tez-api/src/main/java/org/apache/tez/dag/api/client/DAGClientImpl.java index 1cf0bfcd82..2913d08c41 100644 --- a/tez-api/src/main/java/org/apache/tez/dag/api/client/DAGClientImpl.java +++ b/tez-api/src/main/java/org/apache/tez/dag/api/client/DAGClientImpl.java @@ -19,6 +19,7 @@ package org.apache.tez.dag.api.client; import javax.annotation.Nullable; + import java.io.IOException; import java.text.DecimalFormat; import java.util.Collections; @@ -26,11 +27,13 @@ import java.util.HashMap; import java.util.Map; import java.util.Set; +import java.util.concurrent.TimeUnit; import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; - +import org.apache.hadoop.security.UserGroupInformation; +import org.apache.tez.common.CachedEntity; +import org.apache.tez.common.Preconditions; import org.apache.hadoop.yarn.exceptions.ApplicationNotFoundException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -39,11 +42,11 @@ import org.apache.hadoop.yarn.api.records.ApplicationReport; import org.apache.hadoop.yarn.api.records.FinalApplicationStatus; import org.apache.hadoop.yarn.api.records.YarnApplicationState; -import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.exceptions.YarnException; import org.apache.tez.client.FrameworkClient; import org.apache.tez.common.counters.TezCounters; import org.apache.tez.dag.api.DAGNotRunningException; +import org.apache.tez.dag.api.NoCurrentDAGException; import org.apache.tez.dag.api.TezConfiguration; import org.apache.tez.dag.api.TezException; import org.apache.tez.dag.api.TezUncheckedException; @@ -58,13 +61,15 @@ public class DAGClientImpl extends DAGClient { private final String dagId; private final TezConfiguration conf; private final FrameworkClient frameworkClient; - + /** + * Container to cache the last {@link DAGStatus}. + */ + private final CachedEntity cachedDAGStatusRef; @VisibleForTesting protected DAGClientInternal realClient; - private boolean dagCompleted = false; + private volatile boolean dagCompleted = false; @VisibleForTesting protected boolean isATSEnabled = false; - private DAGStatus cachedDagStatus = null; Map cachedVertexStatus = new HashMap(); private static final long SLEEP_FOR_COMPLETION = 500; @@ -79,7 +84,7 @@ public class DAGClientImpl extends DAGClient { private boolean cleanupFrameworkClient; public DAGClientImpl(ApplicationId appId, String dagId, TezConfiguration conf, - YarnConfiguration yarnConf, @Nullable FrameworkClient frameworkClient) { + @Nullable FrameworkClient frameworkClient, UserGroupInformation ugi) { this.appId = appId; this.dagId = dagId; this.conf = conf; @@ -87,7 +92,7 @@ public DAGClientImpl(ApplicationId appId, String dagId, TezConfiguration conf, this.frameworkClient = frameworkClient; } else { this.frameworkClient = FrameworkClient.createFrameworkClient(conf); - this.frameworkClient.init(conf, yarnConf); + this.frameworkClient.init(conf); this.frameworkClient.start(); cleanupFrameworkClient = true; } @@ -99,7 +104,7 @@ public DAGClientImpl(ApplicationId appId, String dagId, TezConfiguration conf, TezConfiguration.TEZ_AM_HISTORY_LOGGING_ENABLED_DEFAULT) && DAGClientTimelineImpl.isSupported(); - realClient = new DAGClientRPCImpl(appId, dagId, conf, this.frameworkClient); + realClient = new DAGClientRPCImpl(appId, dagId, conf, this.frameworkClient, ugi); statusPollInterval = conf.getLong( TezConfiguration.TEZ_DAG_STATUS_POLLINTERVAL_MS, TezConfiguration.TEZ_DAG_STATUS_POLLINTERVAL_MS_DEFAULT); @@ -110,6 +115,28 @@ public DAGClientImpl(ApplicationId appId, String dagId, TezConfiguration conf, this.diagnoticsWaitTimeout = conf.getLong( TezConfiguration.TEZ_CLIENT_DIAGNOSTICS_WAIT_TIMEOUT_MS, TezConfiguration.TEZ_CLIENT_DIAGNOSTICS_WAIT_TIMEOUT_MS_DEFAULT); + cachedDAGStatusRef = initCacheDAGRefFromConf(conf); + } + + /** + * Constructs a new {@link CachedEntity} for {@link DAGStatus}. + * @param tezConf TEZ configuration parameters. + * @return a caching entry to hold the {@link DAGStatus}. + */ + protected CachedEntity initCacheDAGRefFromConf(TezConfiguration tezConf) { + long clientDAGStatusCacheTimeOut = tezConf.getLong( + TezConfiguration.TEZ_CLIENT_DAG_STATUS_CACHE_TIMEOUT_SECS, + TezConfiguration.TEZ_CLIENT_DAG_STATUS_CACHE_TIMEOUT_SECS_DEFAULT); + if (clientDAGStatusCacheTimeOut <= 0) { + LOG.error("DAG Status cache timeout interval should be positive. Enforcing default value."); + clientDAGStatusCacheTimeOut = + TezConfiguration.TEZ_CLIENT_DAG_STATUS_CACHE_TIMEOUT_SECS_DEFAULT; + } + return new CachedEntity<>(TimeUnit.SECONDS, clientDAGStatusCacheTimeOut); + } + + protected CachedEntity getCachedDAGStatusRef() { + return cachedDAGStatusRef; } @Override @@ -133,13 +160,11 @@ public DAGStatus getDAGStatus(@Nullable Set statusOptions, } long startTime = System.currentTimeMillis(); - boolean refreshStatus; - DAGStatus dagStatus; - if(cachedDagStatus != null) { - dagStatus = cachedDagStatus; - refreshStatus = true; - } else { - // For the first lookup only. After this cachedDagStatus should be populated. + + DAGStatus dagStatus = cachedDAGStatusRef.getValue(); + boolean refreshStatus = true; + if (dagStatus == null) { + // the first lookup only or when the cachedDAG has expired dagStatus = getDAGStatus(statusOptions); refreshStatus = false; } @@ -209,7 +234,7 @@ public DAGStatus getDAGStatus(@Nullable Set statusOptions, } } - private DAGStatus getDAGStatusInternal(@Nullable Set statusOptions, + protected DAGStatus getDAGStatusInternal(@Nullable Set statusOptions, long timeout) throws TezException, IOException { if (!dagCompleted) { @@ -221,13 +246,14 @@ private DAGStatus getDAGStatusInternal(@Nullable Set statusOption final DAGStatus dagStatus = getDAGStatusViaAM(statusOptions, timeout); if (!dagCompleted) { - if (dagStatus != null) { - cachedDagStatus = dagStatus; + if (dagStatus != null) { // update the cached DAGStatus + cachedDAGStatusRef.setValue(dagStatus); return dagStatus; } - if (cachedDagStatus != null) { + DAGStatus cachedDAG = cachedDAGStatusRef.getValue(); + if (cachedDAG != null) { // could not get from AM (not reachable/ was killed). return cached status. - return cachedDagStatus; + return cachedDAG; } } @@ -247,16 +273,17 @@ private DAGStatus getDAGStatusInternal(@Nullable Set statusOption LOG.info("Failed to fetch DAG data for completed DAG from YARN Timeline" + " - Application not found by YARN", e); } catch (TezException e) { - if (LOG.isDebugEnabled()) { - LOG.info("DAGStatus fetch failed." + e.getMessage()); - } + LOG.debug("DAGStatus fetch failed", e); } } // dag completed and Timeline service is either not enabled or does not have completion status // return cached status if completion info is present. - if (dagCompleted && cachedDagStatus != null && cachedDagStatus.isCompleted()) { - return cachedDagStatus; + if (dagCompleted) { + DAGStatus cachedDag = cachedDAGStatusRef.getValue(); + if (cachedDag != null && cachedDag.isCompleted()) { + return cachedDag; + } } // everything else fails rely on RM. @@ -270,9 +297,13 @@ public DAGStatus getDAGStatus(@Nullable Set statusOptions) throws } @Override - public VertexStatus getVertexStatus(String vertexName, Set statusOptions) throws - IOException, TezException { + public VertexStatus getVertexStatus(String vertexName, Set statusOptions) + throws IOException, TezException { + return getVertexStatusInternal(statusOptions, vertexName); + } + protected VertexStatus getVertexStatusInternal(Set statusOptions, String vertexName) + throws IOException, TezException { if (!dagCompleted) { VertexStatus vertexStatus = getVertexStatusViaAM(vertexName, statusOptions); @@ -302,9 +333,7 @@ public VertexStatus getVertexStatus(String vertexName, Set status + " - Application not found by YARN", e); return null; } catch (TezException e) { - if (LOG.isDebugEnabled()) { - LOG.debug("ERROR fetching vertex data from Yarn Timeline. " + e.getMessage()); - } + LOG.debug("ERROR fetching vertex data from Yarn Timeline", e); } } @@ -337,16 +366,21 @@ public void tryKillDAG() throws IOException, TezException { } } + @Override + public DAGStatus waitForCompletion(long timeMs) throws IOException, TezException, InterruptedException { + return _waitForCompletionWithStatusUpdates(timeMs, false, EnumSet.noneOf(StatusGetOpts.class)); + } + @Override public DAGStatus waitForCompletion() throws IOException, TezException, InterruptedException { - return _waitForCompletionWithStatusUpdates(false, EnumSet.noneOf(StatusGetOpts.class)); + return _waitForCompletionWithStatusUpdates(-1, false, EnumSet.noneOf(StatusGetOpts.class)); } @Override public DAGStatus waitForCompletionWithStatusUpdates( @Nullable Set statusGetOpts) throws IOException, TezException, InterruptedException { - return _waitForCompletionWithStatusUpdates(true, statusGetOpts); + return _waitForCompletionWithStatusUpdates(-1, true, statusGetOpts); } @Override @@ -375,10 +409,21 @@ private DAGStatus getDAGStatusViaAM(@Nullable Set statusOptions, } catch (ApplicationNotFoundException e) { LOG.info("DAG is no longer running - application not found by YARN", e); dagCompleted = true; + } catch (NoCurrentDAGException e) { + if (conf.getBoolean(TezConfiguration.DAG_RECOVERY_ENABLED, + TezConfiguration.DAG_RECOVERY_ENABLED_DEFAULT)) { + LOG.info("Got NoCurrentDAGException from AM, going on as recovery is enabled", e); + } else { + // if recovery is disabled, we're not expecting the DAG to be finished any time in the future + LOG.info("Got NoCurrentDAGException from AM, returning a failed DAG as recovery is disabled", e); + return dagLost(); + } } catch (TezException e) { - // can be either due to a n/w issue of due to AM completed. + // can be either due to a n/w issue or due to AM completed. + LOG.info("Cannot retrieve DAG Status due to TezException: {}", e.getMessage()); } catch (IOException e) { - // can be either due to a n/w issue of due to AM completed. + // can be either due to a n/w issue or due to AM completed. + LOG.info("Cannot retrieve DAG Status due to IOException: {}", e.getMessage()); } if (dagStatus == null && !dagCompleted) { @@ -388,6 +433,14 @@ private DAGStatus getDAGStatusViaAM(@Nullable Set statusOptions, return dagStatus; } + private DAGStatus dagLost() { + DAGProtos.DAGStatusProto.Builder builder = DAGProtos.DAGStatusProto.newBuilder(); + DAGStatus dagStatus = new DAGStatus(builder, DagStatusSource.AM); + builder.setState(DAGProtos.DAGStatusStateProto.DAG_FAILED); + builder.addAllDiagnostics(Collections.singleton(NoCurrentDAGException.MESSAGE_PREFIX)); + return dagStatus; + } + private VertexStatus getVertexStatusViaAM(String vertexName, Set statusOptions) throws IOException { VertexStatus vertexStatus = null; @@ -400,9 +453,11 @@ private VertexStatus getVertexStatusViaAM(String vertexName, Set LOG.info("DAG is no longer running - application not found by YARN", e); dagCompleted = true; } catch (TezException e) { - // can be either due to a n/w issue of due to AM completed. + // can be either due to a n/w issue or due to AM completed. + LOG.info("Cannot retrieve Vertex Status due to TezException: {}", e.getMessage()); } catch (IOException e) { // can be either due to a n/w issue of due to AM completed. + LOG.info("Cannot retrieve Vertex Status due to IOException: {}", e.getMessage()); } if (vertexStatus == null && !dagCompleted) { @@ -420,12 +475,11 @@ private VertexStatus getVertexStatusViaAM(String vertexName, Set */ @VisibleForTesting protected DAGStatus getDAGStatusViaRM() throws TezException, IOException { - if(LOG.isDebugEnabled()) { - LOG.debug("GetDAGStatus via AM for app: " + appId + " dag:" + dagId); - } + LOG.debug("Get DAG status via framework client for app: {} dag: {}", appId, dagId); ApplicationReport appReport; try { appReport = frameworkClient.getApplicationReport(appId); + LOG.debug("Got appReport from framework client: {}", appReport); } catch (ApplicationNotFoundException e) { LOG.info("DAG is no longer running - application not found by YARN", e); throw new DAGNotRunningException(e); @@ -504,15 +558,21 @@ protected DAGStatus getDAGStatusViaRM() throws TezException, IOException { return dagStatus; } - private DAGStatus _waitForCompletionWithStatusUpdates(boolean vertexUpdates, + private DAGStatus _waitForCompletionWithStatusUpdates(long timeMs, + boolean vertexUpdates, @Nullable Set statusGetOpts) throws IOException, TezException, InterruptedException { DAGStatus dagStatus; boolean initPrinted = false; boolean runningPrinted = false; double dagProgress = -1.0; // Print the first one // monitoring + Long maxNs = timeMs >= 0 ? (System.nanoTime() + (timeMs * 1000000L)) : null; while (true) { - dagStatus = getDAGStatus(statusGetOpts, SLEEP_FOR_COMPLETION); + try { + dagStatus = getDAGStatus(statusGetOpts, SLEEP_FOR_COMPLETION); + } catch (DAGNotRunningException ex) { + return null; + } if (!initPrinted && (dagStatus.getState() == DAGStatus.State.INITING || dagStatus.getState() == DAGStatus.State.SUBMITTED)) { initPrinted = true; // Print once @@ -525,6 +585,9 @@ private DAGStatus _waitForCompletionWithStatusUpdates(boolean vertexUpdates, || dagStatus.getState() == DAGStatus.State.ERROR) { break; } + if (maxNs != null && System.nanoTime() > maxNs) { + return null; + } }// End of while(true) Set vertexNames = Collections.emptySet(); @@ -537,7 +600,14 @@ private DAGStatus _waitForCompletionWithStatusUpdates(boolean vertexUpdates, vertexNames = getDAGStatus(statusGetOpts).getVertexProgress().keySet(); } dagProgress = monitorProgress(vertexNames, dagProgress, null, dagStatus); - dagStatus = getDAGStatus(statusGetOpts, SLEEP_FOR_COMPLETION); + try { + dagStatus = getDAGStatus(statusGetOpts, SLEEP_FOR_COMPLETION); + } catch (DAGNotRunningException ex) { + return null; + } + if (maxNs != null && System.nanoTime() > maxNs) { + return null; + } }// end of while // Always print the last status irrespective of progress change monitorProgress(vertexNames, -1.0, statusGetOpts, dagStatus); @@ -617,9 +687,7 @@ private void switchToTimelineClient() throws IOException, TezException { realClient.close(); realClient = new DAGClientTimelineImpl(appId, dagId, conf, frameworkClient, (int) (2 * PRINT_STATUS_INTERVAL_MILLIS)); - if (LOG.isDebugEnabled()) { - LOG.debug("dag completed switching to DAGClientTimelineImpl"); - } + LOG.debug("dag completed switching to DAGClientTimelineImpl"); } @VisibleForTesting @@ -627,6 +695,11 @@ public DAGClientInternal getRealClient() { return realClient; } + @Override + public String getWebUIAddress() throws IOException, TezException { + return realClient.getWebUIAddress(); + } + private double getProgress(Progress progress) { return (progress.getTotalTaskCount() == 0 ? 0.0 : (double) (progress.getSucceededTaskCount()) / progress.getTotalTaskCount()); diff --git a/tez-api/src/main/java/org/apache/tez/dag/api/client/DAGClientImplLocal.java b/tez-api/src/main/java/org/apache/tez/dag/api/client/DAGClientImplLocal.java new file mode 100644 index 0000000000..851bb687a1 --- /dev/null +++ b/tez-api/src/main/java/org/apache/tez/dag/api/client/DAGClientImplLocal.java @@ -0,0 +1,62 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tez.dag.api.client; + +import java.io.IOException; +import java.util.HashSet; +import java.util.Set; +import java.util.function.BiFunction; + +import javax.annotation.Nullable; + +import org.apache.hadoop.security.UserGroupInformation; +import org.apache.hadoop.yarn.api.records.ApplicationId; +import org.apache.tez.client.FrameworkClient; +import org.apache.tez.dag.api.TezConfiguration; +import org.apache.tez.dag.api.TezException; + +/** + * A DAGClientImpl which is typically used for tez.local.mode.without.network=true. + */ +public class DAGClientImplLocal extends DAGClientImpl { + + private BiFunction, Long, DAGStatus> dagStatusFunction; + private BiFunction, String, VertexStatus> vertexStatusFunction; + + public DAGClientImplLocal(ApplicationId appId, String dagId, TezConfiguration conf, + FrameworkClient frameworkClient, UserGroupInformation ugi, + BiFunction, Long, DAGStatus> dagStatusFunction, + BiFunction, String, VertexStatus> vertexStatusFunction) { + super(appId, dagId, conf, frameworkClient, ugi); + this.dagStatusFunction = dagStatusFunction; + this.vertexStatusFunction = vertexStatusFunction; + } + + @Override + protected DAGStatus getDAGStatusInternal(@Nullable Set statusOptions, long timeout) + throws TezException, IOException { + return dagStatusFunction.apply(statusOptions == null ? new HashSet<>() : statusOptions, + timeout); + } + + @Override + protected VertexStatus getVertexStatusInternal(@Nullable Set statusOptions, String vertexName) + throws TezException, IOException { + return vertexStatusFunction.apply(statusOptions == null ? new HashSet<>() : statusOptions, vertexName); + } +} diff --git a/tez-api/src/main/java/org/apache/tez/dag/api/client/DAGClientInternal.java b/tez-api/src/main/java/org/apache/tez/dag/api/client/DAGClientInternal.java index a3c898a855..8346d53da7 100644 --- a/tez-api/src/main/java/org/apache/tez/dag/api/client/DAGClientInternal.java +++ b/tez-api/src/main/java/org/apache/tez/dag/api/client/DAGClientInternal.java @@ -125,4 +125,6 @@ public abstract VertexStatus getVertexStatus(String vertexName, */ public abstract DAGStatus waitForCompletionWithStatusUpdates(@Nullable Set statusGetOpts) throws IOException, TezException, InterruptedException; + + public abstract String getWebUIAddress() throws IOException, TezException; } diff --git a/tez-api/src/main/java/org/apache/tez/dag/api/client/DAGClientTimelineImpl.java b/tez-api/src/main/java/org/apache/tez/dag/api/client/DAGClientTimelineImpl.java index d34dbf008d..4ec9c94354 100644 --- a/tez-api/src/main/java/org/apache/tez/dag/api/client/DAGClientTimelineImpl.java +++ b/tez-api/src/main/java/org/apache/tez/dag/api/client/DAGClientTimelineImpl.java @@ -213,6 +213,9 @@ public void close() throws IOException { httpClient.destroy(); httpClient = null; } + if (timelineReaderStrategy != null) { + timelineReaderStrategy.close(); + } } private DAGStatusProto.Builder parseDagStatus(JSONObject jsonRoot, Set statusOptions) @@ -520,4 +523,9 @@ public DAGStatus getDAGStatus(@Nullable Set statusOptions, return getDAGStatus(statusOptions); } + @Override + public String getWebUIAddress() throws IOException, TezException { + throw new TezException("DAGClientTimelineImpl.getWebUIAddress is not supported"); + } + } diff --git a/tez-api/src/main/java/org/apache/tez/dag/api/client/DAGStatus.java b/tez-api/src/main/java/org/apache/tez/dag/api/client/DAGStatus.java index cbf641e00f..1f8db62b80 100644 --- a/tez-api/src/main/java/org/apache/tez/dag/api/client/DAGStatus.java +++ b/tez-api/src/main/java/org/apache/tez/dag/api/client/DAGStatus.java @@ -152,6 +152,14 @@ public TezCounters getDAGCounters() { return dagCounters; } + public long getMemoryUsedByAM() { + return proxy.getMemoryUsedByAM(); + } + + public long getMemoryUsedByTasks() { + return proxy.getMemoryUsedByTasks(); + } + @InterfaceAudience.Private DagStatusSource getSource() { return this.source; @@ -201,12 +209,12 @@ public int hashCode() { @Override public String toString() { StringBuilder sb = new StringBuilder(); - sb.append("status=" + getState() - + ", progress=" + getDAGProgress() - + ", diagnostics=" - + StringUtils.join(getDiagnostics(), LINE_SEPARATOR) - + ", counters=" - + (getDAGCounters() == null ? "null" : getDAGCounters().toString())); + sb.append("status=" + getState()); + sb.append(", progress=" + getDAGProgress()); + sb.append(", diagnostics=" + StringUtils.join(getDiagnostics(), LINE_SEPARATOR)); + sb.append(", memoryUsedByAM=").append(proxy.getMemoryUsedByAM()); + sb.append(", memoryUsedByTasks=").append(proxy.getMemoryUsedByTasks()); + sb.append(", counters=" + (getDAGCounters() == null ? "null" : getDAGCounters().toString())); return sb.toString(); } diff --git a/tez-api/src/main/java/org/apache/tez/dag/api/client/Progress.java b/tez-api/src/main/java/org/apache/tez/dag/api/client/Progress.java index 110ac90d8d..656838dc5a 100644 --- a/tez-api/src/main/java/org/apache/tez/dag/api/client/Progress.java +++ b/tez-api/src/main/java/org/apache/tez/dag/api/client/Progress.java @@ -63,6 +63,10 @@ public int getKilledTaskAttemptCount() { return proxy.getKilledTaskAttemptCount(); } + public int getRejectedTaskAttemptCount() { + return proxy.getRejectedTaskAttemptCount(); + } + @Override public boolean equals(Object obj) { if (obj instanceof Progress){ @@ -73,7 +77,8 @@ && getRunningTaskCount() == other.getRunningTaskCount() && getFailedTaskCount() == other.getFailedTaskCount() && getKilledTaskCount() == other.getKilledTaskCount() && getFailedTaskAttemptCount() == other.getFailedTaskAttemptCount() - && getKilledTaskAttemptCount() == other.getKilledTaskAttemptCount(); + && getKilledTaskAttemptCount() == other.getKilledTaskAttemptCount() + && getRejectedTaskAttemptCount() == other.getRejectedTaskAttemptCount(); } return false; } @@ -94,6 +99,8 @@ public int hashCode() { getFailedTaskAttemptCount(); result = prime * result + getKilledTaskAttemptCount(); + result = prime * result + + getRejectedTaskAttemptCount(); return result; } @@ -119,6 +126,10 @@ public String toString() { sb.append(" KilledTaskAttempts: "); sb.append(getKilledTaskAttemptCount()); } + if (getRejectedTaskAttemptCount() > 0) { + sb.append(" RejectedTaskAttempts: "); + sb.append(getRejectedTaskAttemptCount()); + } return sb.toString(); } diff --git a/tez-api/src/main/java/org/apache/tez/dag/api/client/StatusGetOpts.java b/tez-api/src/main/java/org/apache/tez/dag/api/client/StatusGetOpts.java index 1a9df7afa1..3518d33c2c 100644 --- a/tez-api/src/main/java/org/apache/tez/dag/api/client/StatusGetOpts.java +++ b/tez-api/src/main/java/org/apache/tez/dag/api/client/StatusGetOpts.java @@ -29,5 +29,6 @@ @Evolving public enum StatusGetOpts { /** Retrieve Counters with Status */ - GET_COUNTERS + GET_COUNTERS, + GET_MEMORY_USAGE } diff --git a/tez-api/src/main/java/org/apache/tez/dag/api/client/TimelineReaderFactory.java b/tez-api/src/main/java/org/apache/tez/dag/api/client/TimelineReaderFactory.java index c0569dda98..fec9191dec 100644 --- a/tez-api/src/main/java/org/apache/tez/dag/api/client/TimelineReaderFactory.java +++ b/tez-api/src/main/java/org/apache/tez/dag/api/client/TimelineReaderFactory.java @@ -54,14 +54,14 @@ * create a httpclient, configured for the appropriate runtime. * * on hadoop 2.6+ the factory returns TimelineReaderTokenAuthenticatedStrategy, which supports - * kerberos based auth (secure cluster) or psuedo auth (un-secure cluster). + * kerberos based auth (secure cluster) or pseudo auth (un-secure cluster). * * on hadoop 2.4 where the token delegation auth is not supported, TimelineReaderPseudoAuthenticatedStrategy * is used which supports only unsecure timeline. * */ @InterfaceAudience.Private -public class TimelineReaderFactory { +public final class TimelineReaderFactory { private static final Logger LOG = LoggerFactory.getLogger(TimelineReaderFactory.class); @@ -79,6 +79,8 @@ public class TimelineReaderFactory { private static Class delegationTokenAuthenticatorClazz = null; private static Method delegationTokenAuthenticateURLOpenConnectionMethod = null; + private TimelineReaderFactory() {} + public static TimelineReaderStrategy getTimelineReaderStrategy(Configuration conf, boolean useHttps, int connTimeout) throws TezException { @@ -133,23 +135,24 @@ public static boolean isTimelineClientSupported() { public interface TimelineReaderStrategy { Client getHttpClient() throws IOException; + void close(); } /* * auth strategy for secured and unsecured environment with delegation token (hadoop 2.6 and above) */ private static class TimelineReaderTokenAuthenticatedStrategy implements TimelineReaderStrategy { - private final Configuration conf; private final boolean useHttps; private final int connTimeout; + private final SSLFactory sslFactory; public TimelineReaderTokenAuthenticatedStrategy(final Configuration conf, final boolean useHttps, final int connTimeout) { - this.conf = conf; this.useHttps = useHttps; this.connTimeout = connTimeout; + this.sslFactory = useHttps ? new SSLFactory(CLIENT, conf) : null; } @Override @@ -157,11 +160,10 @@ public Client getHttpClient() throws IOException { Authenticator authenticator; UserGroupInformation ugi = UserGroupInformation.getCurrentUser(); UserGroupInformation realUgi = ugi.getRealUser(); - UserGroupInformation authUgi; String doAsUser; ClientConfig clientConfig = new DefaultClientConfig(JSONRootElementProvider.App.class); - ConnectionConfigurator connectionConfigurator = getNewConnectionConf(conf, useHttps, - connTimeout); + ConnectionConfigurator connectionConfigurator = getNewConnectionConf(useHttps, + connTimeout, sslFactory); try { authenticator = getTokenAuthenticator(); @@ -171,17 +173,15 @@ public Client getHttpClient() throws IOException { } if (realUgi != null) { - authUgi = realUgi; doAsUser = ugi.getShortUserName(); } else { - authUgi = ugi; doAsUser = null; } HttpURLConnectionFactory connectionFactory; try { connectionFactory = new TokenAuthenticatedURLConnectionFactory(connectionConfigurator, authenticator, - authUgi, doAsUser); + doAsUser); } catch (TezException e) { throw new IOException("Fail to create TokenAuthenticatedURLConnectionFactory", e); } @@ -204,17 +204,14 @@ private static class TokenAuthenticatedURLConnectionFactory implements HttpURLCo private final Authenticator authenticator; private final ConnectionConfigurator connConfigurator; - private final UserGroupInformation authUgi; private final String doAsUser; private final AuthenticatedURL.Token token; public TokenAuthenticatedURLConnectionFactory(ConnectionConfigurator connConfigurator, Authenticator authenticator, - UserGroupInformation authUgi, String doAsUser) throws TezException { this.connConfigurator = connConfigurator; this.authenticator = authenticator; - this.authUgi = authUgi; this.doAsUser = doAsUser; this.token = ReflectionUtils.createClazzInstance( DELEGATION_TOKEN_AUTHENTICATED_URL_TOKEN_CLASS_NAME, null, null); @@ -238,6 +235,13 @@ public HttpURLConnection getHttpURLConnection(URL url) throws IOException { } } } + + @Override + public void close() { + if (sslFactory != null) { + sslFactory.destroy(); + } + } } /* @@ -247,19 +251,20 @@ public HttpURLConnection getHttpURLConnection(URL url) throws IOException { protected static class TimelineReaderPseudoAuthenticatedStrategy implements TimelineReaderStrategy { private final ConnectionConfigurator connectionConf; + private final SSLFactory sslFactory; public TimelineReaderPseudoAuthenticatedStrategy(final Configuration conf, final boolean useHttps, final int connTimeout) { - connectionConf = getNewConnectionConf(conf, useHttps, connTimeout); + sslFactory = useHttps ? new SSLFactory(CLIENT, conf) : null; + connectionConf = getNewConnectionConf(useHttps, connTimeout, sslFactory); } @Override public Client getHttpClient() { ClientConfig config = new DefaultClientConfig(JSONRootElementProvider.App.class); HttpURLConnectionFactory urlFactory = new PseudoAuthenticatedURLConnectionFactory(connectionConf); - Client httpClient = new Client(new URLConnectionClientHandler(urlFactory), config); - return httpClient; + return new Client(new URLConnectionClientHandler(urlFactory), config); } @VisibleForTesting @@ -276,33 +281,38 @@ public HttpURLConnection getHttpURLConnection(URL url) throws IOException { URLEncoder.encode(UserGroupInformation.getCurrentUser().getShortUserName(), "UTF8"); HttpURLConnection httpURLConnection = - (HttpURLConnection) (new URL(url.toString() + tokenString)).openConnection(); + (HttpURLConnection) (new URL(url + tokenString)).openConnection(); this.connectionConf.configure(httpURLConnection); return httpURLConnection; } } + + @Override + public void close() { + if (sslFactory != null) { + sslFactory.destroy(); + } + } } - private static ConnectionConfigurator getNewConnectionConf(final Configuration conf, - final boolean useHttps, - final int connTimeout) { + private static ConnectionConfigurator getNewConnectionConf(final boolean useHttps, + final int connTimeout, + final SSLFactory sslFactory) { ConnectionConfigurator connectionConf = null; if (useHttps) { try { - connectionConf = getNewSSLConnectionConf(conf, connTimeout); + connectionConf = getNewSSLConnectionConf(connTimeout, sslFactory); } catch (IOException e) { - if (LOG.isDebugEnabled()) { - LOG.debug("Cannot load customized ssl related configuration." - + " Falling back to system-generic settings.", e); - } + LOG.debug("Cannot load customized ssl related configuration." + + " Falling back to system-generic settings.", e); } } if (connectionConf == null) { connectionConf = new ConnectionConfigurator() { @Override - public HttpURLConnection configure(HttpURLConnection httpURLConnection) throws IOException { + public HttpURLConnection configure(HttpURLConnection httpURLConnection) { setTimeouts(httpURLConnection, connTimeout); return httpURLConnection; } @@ -312,14 +322,12 @@ public HttpURLConnection configure(HttpURLConnection httpURLConnection) throws I return connectionConf; } - private static ConnectionConfigurator getNewSSLConnectionConf(final Configuration conf, - final int connTimeout) + private static ConnectionConfigurator getNewSSLConnectionConf(final int connTimeout, + final SSLFactory sslFactory) throws IOException { - final SSLFactory sslFactory; final SSLSocketFactory sslSocketFactory; final HostnameVerifier hostnameVerifier; - sslFactory = new SSLFactory(CLIENT, conf); try { sslFactory.init(); sslSocketFactory = sslFactory.createSSLSocketFactory(); diff --git a/tez-api/src/main/java/org/apache/tez/dag/api/client/VertexStatus.java b/tez-api/src/main/java/org/apache/tez/dag/api/client/VertexStatus.java index dfb9bbe8cd..9efb12d612 100644 --- a/tez-api/src/main/java/org/apache/tez/dag/api/client/VertexStatus.java +++ b/tez-api/src/main/java/org/apache/tez/dag/api/client/VertexStatus.java @@ -59,6 +59,10 @@ public VertexStatus(VertexStatusProtoOrBuilder proxy) { this.proxy = proxy; } + public String getId() { + return proxy.getId(); + } + public State getState() { return getState(proxy.getState()); } diff --git a/tez-api/src/main/java/org/apache/tez/dag/api/client/rpc/DAGClientRPCImpl.java b/tez-api/src/main/java/org/apache/tez/dag/api/client/rpc/DAGClientRPCImpl.java index 02935dfd20..798160a4ab 100644 --- a/tez-api/src/main/java/org/apache/tez/dag/api/client/rpc/DAGClientRPCImpl.java +++ b/tez-api/src/main/java/org/apache/tez/dag/api/client/rpc/DAGClientRPCImpl.java @@ -23,6 +23,7 @@ import javax.annotation.Nullable; +import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.yarn.exceptions.ApplicationNotFoundException; import org.apache.tez.common.RPCUtil; import org.apache.tez.dag.api.SessionNotRunning; @@ -47,6 +48,7 @@ import org.apache.tez.dag.api.client.VertexStatus; import org.apache.tez.dag.api.client.rpc.DAGClientAMProtocolRPC.GetDAGStatusRequestProto; import org.apache.tez.dag.api.client.rpc.DAGClientAMProtocolRPC.GetVertexStatusRequestProto; +import org.apache.tez.dag.api.client.rpc.DAGClientAMProtocolRPC.GetWebUIAddressRequestProto; import org.apache.tez.dag.api.client.rpc.DAGClientAMProtocolRPC.TryKillDAGRequestProto; import com.google.common.annotations.VisibleForTesting; @@ -68,13 +70,16 @@ public class DAGClientRPCImpl extends DAGClientInternal { @VisibleForTesting DAGClientAMProtocolBlockingPB proxy = null; + private UserGroupInformation ugi; + public DAGClientRPCImpl(ApplicationId appId, String dagId, - TezConfiguration conf, @Nullable FrameworkClient frameworkClient) { + TezConfiguration conf, @Nullable FrameworkClient frameworkClient, UserGroupInformation ugi) { this.appId = appId; this.dagId = dagId; this.conf = conf; this.frameworkClient = frameworkClient; appReport = null; + this.ugi = ugi; } @Override @@ -142,9 +147,7 @@ public String getSessionIdentifierString() { @Override public void tryKillDAG() throws TezException, IOException { - if(LOG.isDebugEnabled()) { - LOG.debug("TryKill for app: " + appId + " dag:" + dagId); - } + LOG.debug("TryKill for app: {} dag:{}", appId, dagId); try { if (createAMProxyIfNeeded()) { TryKillDAGRequestProto requestProto = @@ -182,9 +185,7 @@ void resetProxy(Exception e) { DAGStatus getDAGStatusViaAM(Set statusOptions, long timeout) throws IOException, TezException { - if(LOG.isDebugEnabled()) { - LOG.debug("GetDAGStatus via AM for app: " + appId + " dag:" + dagId); - } + LOG.debug("GetDAGStatus via AM for app: {} dag:{}", appId, dagId); GetDAGStatusRequestProto.Builder requestProtoBuilder = GetDAGStatusRequestProto.newBuilder() .setDagId(dagId).setTimeout(timeout); @@ -286,7 +287,7 @@ boolean createAMProxyIfNeeded() throws IOException, TezException, } proxy = TezClientUtils.getAMProxy(conf, appReport.getHost(), appReport.getRpcPort(), - appReport.getClientToAMToken()); + appReport.getClientToAMToken(), ugi); return true; } @@ -303,4 +304,15 @@ public DAGStatus waitForCompletionWithStatusUpdates(@Nullable Set throw new TezException("not supported"); } + @Override + public String getWebUIAddress() throws IOException, TezException { + LOG.debug("getWebUIAddress via AM for app: {} dag: {}", appId, dagId); + GetWebUIAddressRequestProto.Builder requestProtoBuilder = GetWebUIAddressRequestProto.newBuilder(); + try { + return proxy.getWebUIAddress(null, requestProtoBuilder.build()).getWebUiAddress(); + } catch (ServiceException e) { + RPCUtil.unwrapAndThrowException(e); + throw new TezException(e); + } + } } diff --git a/tez-api/src/main/java/org/apache/tez/runtime/api/Input.java b/tez-api/src/main/java/org/apache/tez/runtime/api/Input.java index bda439616e..a901d8aa7a 100644 --- a/tez-api/src/main/java/org/apache/tez/runtime/api/Input.java +++ b/tez-api/src/main/java/org/apache/tez/runtime/api/Input.java @@ -26,7 +26,7 @@ * * This interface has methods which can be used by a {@link org.apache.tez.runtime.api.Processor} * to control execution of this Input and read data from it. - * + * * Actual implementations are expected to derive from {@link AbstractLogicalInput} */ @Public @@ -36,17 +36,17 @@ public interface Input { /** * Start any processing that the Input may need to perform. It is the * responsibility of the Processor to start Inputs. - * + * * This typically acts as a signal to Inputs to start any Processing that they - * may required. A blocking implementation of this method should not be used + * may require. A blocking implementation of this method should not be used * as a mechanism to determine when an Input is actually ready. - * + * * This method may be invoked by the framework under certain circumstances, * and as such requires the implementation to be non-blocking. - * + * * Inputs must be written to handle multiple start invocations - typically * honoring only the first one. - * + * * @throws Exception */ public void start() throws Exception; diff --git a/tez-api/src/main/java/org/apache/tez/runtime/api/InputContext.java b/tez-api/src/main/java/org/apache/tez/runtime/api/InputContext.java index 479a7dba1b..6eac2dfc95 100644 --- a/tez-api/src/main/java/org/apache/tez/runtime/api/InputContext.java +++ b/tez-api/src/main/java/org/apache/tez/runtime/api/InputContext.java @@ -32,6 +32,12 @@ public interface InputContext extends TaskContext { * @return Name of the Source Vertex */ public String getSourceVertexName(); + + /** + * Returns a convenient, human-readable string describing the input and output vertices. + * @return the convenient string + */ + String getInputOutputVertexNames(); /** * Get the index of the input in the set of all inputs for the task. The diff --git a/tez-api/src/main/java/org/apache/tez/runtime/api/InputInitializerContext.java b/tez-api/src/main/java/org/apache/tez/runtime/api/InputInitializerContext.java index 6a123cf1a1..5cb2d221b3 100644 --- a/tez-api/src/main/java/org/apache/tez/runtime/api/InputInitializerContext.java +++ b/tez-api/src/main/java/org/apache/tez/runtime/api/InputInitializerContext.java @@ -24,8 +24,10 @@ import org.apache.hadoop.classification.InterfaceAudience.Public; import org.apache.hadoop.classification.InterfaceStability.Unstable; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.tez.common.counters.TezCounters; import org.apache.tez.dag.api.UserPayload; import org.apache.tez.dag.api.event.VertexState; import org.apache.tez.dag.api.event.VertexStateUpdate; @@ -48,7 +50,13 @@ public interface InputInitializerContext { * @return DAG name */ String getDAGName(); - + + /** + * Get vertex configuration + * @return Vertex configuration + */ + Configuration getVertexConfiguration(); + /** * Get the name of the input * @return Input name @@ -79,7 +87,12 @@ public interface InputInitializerContext { * @return Resource */ Resource getVertexTaskResource(); - + + /** + * Get the vertex id as integer that belongs to this input. + */ + int getVertexId(); + /** * Get the total resource allocated to this vertex. If the DAG is running in * a busy cluster then it may have no resources available dedicated to it. The @@ -117,4 +130,10 @@ public interface InputInitializerContext { */ void registerForVertexStateUpdates(String vertexName, @Nullable Set stateSet); + /** + * Add custom counters + * + * @param tezCounters counters to add + */ + void addCounters(TezCounters tezCounters); } diff --git a/tez-api/src/main/java/org/apache/tez/runtime/api/OutputCommitterContext.java b/tez-api/src/main/java/org/apache/tez/runtime/api/OutputCommitterContext.java index d254a6a99e..5b8906d520 100644 --- a/tez-api/src/main/java/org/apache/tez/runtime/api/OutputCommitterContext.java +++ b/tez-api/src/main/java/org/apache/tez/runtime/api/OutputCommitterContext.java @@ -78,4 +78,6 @@ public interface OutputCommitterContext { */ public int getVertexIndex(); + public int getDagIdentifier(); + } diff --git a/tez-api/src/main/java/org/apache/tez/runtime/api/OutputContext.java b/tez-api/src/main/java/org/apache/tez/runtime/api/OutputContext.java index 882eb4be2a..f0de897fda 100644 --- a/tez-api/src/main/java/org/apache/tez/runtime/api/OutputContext.java +++ b/tez-api/src/main/java/org/apache/tez/runtime/api/OutputContext.java @@ -19,6 +19,7 @@ package org.apache.tez.runtime.api; import org.apache.hadoop.classification.InterfaceAudience.Public; +import org.apache.hadoop.yarn.event.EventHandler; /** * Context handle for the Output to initialize itself. @@ -32,20 +33,35 @@ public interface OutputContext extends TaskContext { * Output's data * @return Name of the Destination Vertex */ - public String getDestinationVertexName(); - + String getDestinationVertexName(); + + /** + * Returns a convenient, human-readable string describing the input and output vertices. + * @return the convenient string + */ + String getInputOutputVertexNames(); + /** * Get the index of the output in the set of all outputs for the task. The * index will be consistent and valid only among the tasks of this vertex. * @return index */ - public int getOutputIndex(); + int getOutputIndex(); /** * Get an {@link OutputStatisticsReporter} for this {@link Output} that can * be used to report statistics like data size * @return {@link OutputStatisticsReporter} */ - public OutputStatisticsReporter getStatisticsReporter(); + OutputStatisticsReporter getStatisticsReporter(); + + /** + * Notify the context that at this point no more events should be sent. + * This is used as a safety measure to prevent events being sent after close + * or in cleanup. After this is called events being queued to be sent to the + * AM will instead be passed to the event handler. + * @param eventHandler should handle the events after the call. + */ + void trapEvents(EventHandler eventHandler); } diff --git a/tez-api/src/main/java/org/apache/tez/runtime/api/ProcessorContext.java b/tez-api/src/main/java/org/apache/tez/runtime/api/ProcessorContext.java index acb2a57da7..3782a8d8d7 100644 --- a/tez-api/src/main/java/org/apache/tez/runtime/api/ProcessorContext.java +++ b/tez-api/src/main/java/org/apache/tez/runtime/api/ProcessorContext.java @@ -22,6 +22,7 @@ import java.util.Collection; import org.apache.hadoop.classification.InterfaceAudience.Public; +import org.apache.tez.common.ProgressHelper; /** * Context handle for the Processor to initialize itself. @@ -30,13 +31,32 @@ @Public public interface ProcessorContext extends TaskContext { + /** + * validate that progress is the valid range. + * @param progress + * @return the processed value of the progress that is guaranteed to be within + * the valid range. + */ + static float preProcessProgress(float progress) { + return ProgressHelper.processProgress(progress); + } + /** * Set the overall progress of this Task Attempt. * This automatically results in invocation of {@link ProcessorContext#notifyProgress()} * and so invoking that separately is not required. * @param progress Progress in the range from [0.0 - 1.0f] */ - public void setProgress(float progress); + default void setProgress(float progress) { + setProgressInternally(preProcessProgress(progress)); + } + + /** + * The actual implementation of the taskAttempt progress. + * All implementations needs to override this method + * @param progress + */ + void setProgressInternally(float progress); /** * Check whether this attempt can commit its output diff --git a/tez-api/src/main/java/org/apache/tez/runtime/api/TaskContext.java b/tez-api/src/main/java/org/apache/tez/runtime/api/TaskContext.java index dd2951a382..004295ddad 100644 --- a/tez-api/src/main/java/org/apache/tez/runtime/api/TaskContext.java +++ b/tez-api/src/main/java/org/apache/tez/runtime/api/TaskContext.java @@ -27,6 +27,7 @@ import org.apache.hadoop.classification.InterfaceAudience.Private; import org.apache.hadoop.classification.InterfaceAudience.Public; import org.apache.hadoop.classification.InterfaceStability.Unstable; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.tez.common.counters.TezCounters; import org.apache.tez.dag.api.UserPayload; @@ -62,6 +63,12 @@ public interface TaskContext { */ public int getTaskAttemptNumber(); + /** + * Get container configuration + * @return Container configuration + */ + public Configuration getContainerConfiguration(); + /** * Get the name of the DAG * @return the DAG name @@ -73,9 +80,9 @@ public interface TaskContext { * @return Vertex Name */ public String getTaskVertexName(); - + /** - * Get the index of this task's vertex in the set of vertices in the DAG. This + * Get the index of this task's vertex in the set of vertices in the DAG. This * is consistent and valid across all tasks/vertices in the same DAG. * @return index */ @@ -112,24 +119,24 @@ public interface TaskContext { /** * Returns an identifier which is unique to the specific Input, Processor or * Output - * + * * @return a unique identifier */ public String getUniqueIdentifier(); - + /** - * Returns a shared {@link ObjectRegistry} to hold user objects in memory - * between tasks. + * Returns a shared {@link ObjectRegistry} to hold user objects in memory + * between tasks. * @return {@link ObjectRegistry} */ public ObjectRegistry getObjectRegistry(); - + /** - * Notifies the framework that progress is being made by this component. + * Notifies the framework that progress is being made by this component. * This is used to identify hung components that are not making progress. * Must be called periodically until processing has completed for this component. - * Care must be taken to call this when real progress has been made. Simply - * calling this continuously from a thread without regard to real work may prevent + * Care must be taken to call this when real progress has been made. Simply + * calling this continuously from a thread without regard to real work may prevent * identification of hung components and delay/stall job completion. */ public void notifyProgress(); @@ -191,34 +198,34 @@ public interface TaskContext { */ @Nullable public ByteBuffer getServiceProviderMetaData(String serviceName); - + /** * Request a specific amount of memory during initialization * (initialize(..*Context)) The requester is notified of allocation via the * provided callback handler. - * + * * Currently, (post TEZ-668) the caller will be informed about the available * memory after initialization (I/P/O initialize(...)), and before the * start/run invocation. There will be no other invocations on the callback. - * + * * This method can be called only once by any component. Calling it multiple * times from within the same component will result in an error. - * + * * Each Input / Output must request memory. For Inputs / Outputs which do not * have a specific ask, a null callback handler can be specified with a * request size of 0. - * + * * @param size * request size in bytes. * @param callbackHandler * the callback handler to be invoked once memory is assigned */ public void requestInitialMemory(long size, MemoryUpdateCallback callbackHandler); - + /** * Gets the total memory available to all components of the running task. This * values will always be constant, and does not factor in any allocations. - * + * * @return the total available memory for all components of the task */ public long getTotalMemoryAvailableToTask(); @@ -241,8 +248,8 @@ public interface TaskContext { * might not be guaranteed. The service returned works with tez framework, currently it provides * thread reuse across tasks. * Note: This is an unstable api, and is not recommended to be used by external users. Please wait - * until API and code is stablized by use in Tez processors, input and outputs. - * @param parallelism The expected parallelism for for this ExecutorService. + * until API and code is stabilized by use in Tez processors, input and outputs. + * @param parallelism The expected parallelism for this ExecutorService. * @param threadNameFormat The thread name format, format will be given one parameter, threadId. * @return An ExecutorService instance. */ diff --git a/tez-api/src/main/java/org/apache/tez/runtime/api/events/CustomProcessorEvent.java b/tez-api/src/main/java/org/apache/tez/runtime/api/events/CustomProcessorEvent.java new file mode 100644 index 0000000000..7d84a1d79d --- /dev/null +++ b/tez-api/src/main/java/org/apache/tez/runtime/api/events/CustomProcessorEvent.java @@ -0,0 +1,65 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tez.runtime.api.events; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceAudience.Private; +import org.apache.tez.runtime.api.Event; + +import java.nio.ByteBuffer; + +public class CustomProcessorEvent extends Event { + private ByteBuffer payload; + + /** + * Version number to indicate what app attempt generated this Event + */ + private int version; + + private CustomProcessorEvent(ByteBuffer payload) { + this(payload, -1); + } + + private CustomProcessorEvent(ByteBuffer payload, int version) { + this.payload = payload; + this.version = version; + } + + public static CustomProcessorEvent create(ByteBuffer payload) { + return new CustomProcessorEvent(payload); + } + + @Private + public static CustomProcessorEvent create(ByteBuffer payload, int version) { + return new CustomProcessorEvent(payload, version); + } + + public ByteBuffer getPayload() { + return payload.asReadOnlyBuffer(); + } + + @Private + public void setVersion(int version) { + this.version = version; + } + + public int getVersion() { + return version; + } +} diff --git a/tez-api/src/main/java/org/apache/tez/runtime/api/events/InputDataInformationEvent.java b/tez-api/src/main/java/org/apache/tez/runtime/api/events/InputDataInformationEvent.java index a62a34154f..8cf0616e6b 100644 --- a/tez-api/src/main/java/org/apache/tez/runtime/api/events/InputDataInformationEvent.java +++ b/tez-api/src/main/java/org/apache/tez/runtime/api/events/InputDataInformationEvent.java @@ -49,8 +49,8 @@ public final class InputDataInformationEvent extends Event { private final int sourceIndex; private int targetIndex; // TODO Likely to be multiple at a later point. private final ByteBuffer userPayload; + private String serializedPath; private final Object userPayloadObject; - private InputDataInformationEvent(int srcIndex, ByteBuffer userPayload) { this.sourceIndex = srcIndex; @@ -79,6 +79,12 @@ public static InputDataInformationEvent createWithObjectPayload(int srcIndex, return new InputDataInformationEvent(srcIndex, userPayloadDeserialized, null); } + public static InputDataInformationEvent createWithSerializedPath(int srcIndex, String serializedPath) { + InputDataInformationEvent event = new InputDataInformationEvent(srcIndex, null); + event.serializedPath = serializedPath; + return event; + } + public int getSourceIndex() { return this.sourceIndex; } @@ -90,19 +96,29 @@ public int getTargetIndex() { public void setTargetIndex(int target) { this.targetIndex = target; } - + + public String getSerializedPath() { + return serializedPath; + } + public ByteBuffer getUserPayload() { return userPayload == null ? null : userPayload.asReadOnlyBuffer(); } - + public Object getDeserializedUserPayload() { return this.userPayloadObject; } - @Override public String toString() { - return "InputDataInformationEvent [sourceIndex=" + sourceIndex + ", targetIndex=" - + targetIndex + ", serializedUserPayloadExists=" + (userPayload != null) - + ", deserializedUserPayloadExists=" + (userPayloadObject != null) + "]"; - } + StringBuilder sb = new StringBuilder(); + sb.append("InputDataInformationEvent [sourceIndex=").append(sourceIndex) + .append(", targetIndex=").append(targetIndex) + .append(", serializedUserPayloadExists=").append(userPayload != null) + .append(", deserializedUserPayloadExists=").append(userPayloadObject != null); + if (serializedPath != null) { + sb.append(", serializedPath=").append(serializedPath); + } + sb.append("]"); + return sb.toString(); + } } diff --git a/tez-api/src/main/java/org/apache/tez/runtime/api/events/InputInitializerEvent.java b/tez-api/src/main/java/org/apache/tez/runtime/api/events/InputInitializerEvent.java index 3037e619b5..21ad130c88 100644 --- a/tez-api/src/main/java/org/apache/tez/runtime/api/events/InputInitializerEvent.java +++ b/tez-api/src/main/java/org/apache/tez/runtime/api/events/InputInitializerEvent.java @@ -21,8 +21,7 @@ package org.apache.tez.runtime.api.events; import java.nio.ByteBuffer; - -import com.google.common.base.Preconditions; +import java.util.Objects; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceAudience.Public; @@ -53,8 +52,8 @@ public class InputInitializerEvent extends Event { private InputInitializerEvent(String targetVertexName, String targetInputName, ByteBuffer eventPayload) { - Preconditions.checkNotNull(targetVertexName, "TargetVertexName cannot be null"); - Preconditions.checkNotNull(targetInputName, "TargetInputName cannot be null"); + Objects.requireNonNull(targetVertexName, "TargetVertexName cannot be null"); + Objects.requireNonNull(targetInputName, "TargetInputName cannot be null"); this.targetVertexName = targetVertexName; this.targetInputName = targetInputName; this.eventPayload = eventPayload; diff --git a/tez-api/src/main/java/org/apache/tez/runtime/api/events/InputReadErrorEvent.java b/tez-api/src/main/java/org/apache/tez/runtime/api/events/InputReadErrorEvent.java index 7d2e0d25a8..6c9f1fbe6b 100644 --- a/tez-api/src/main/java/org/apache/tez/runtime/api/events/InputReadErrorEvent.java +++ b/tez-api/src/main/java/org/apache/tez/runtime/api/events/InputReadErrorEvent.java @@ -21,6 +21,10 @@ import org.apache.hadoop.classification.InterfaceAudience.Public; import org.apache.tez.runtime.api.Event; +import com.google.common.annotations.VisibleForTesting; + +import java.util.Objects; + /** * Event generated by an Input to indicate error when trying to retrieve data. * This is not necessarily a fatal event - it's an indication to the AM to retry @@ -44,17 +48,63 @@ public final class InputReadErrorEvent extends Event { */ private final int version; - private InputReadErrorEvent(String diagnostics, int index, - int version) { + /** + * Number of failures. + */ + private final int numFailures; + + /** + * Whether this input read error is caused while fetching local file. + */ + private final boolean isLocalFetch; + + /** + * Whether this input read error is caused because the fetcher detected a fatal, unrecoverable, + * local file read issue from the shuffle handler. + */ + private final boolean isDiskErrorAtSource; + + /** + * The localhostName of the destination task attempt. + */ + private final String destinationLocalhostName; + + private InputReadErrorEvent(final String diagnostics, final int index, final int version, + final int numFailures, boolean isLocalFetch, boolean isDiskErrorAtSource, String destinationLocalhostName) { super(); this.diagnostics = diagnostics; this.index = index; this.version = version; + this.numFailures = numFailures; + this.isLocalFetch = isLocalFetch; + this.isDiskErrorAtSource = isDiskErrorAtSource; + this.destinationLocalhostName = destinationLocalhostName; + } + + /** + * Creates an InputReadErrorEvent from less parameters, supposed to be used from tests only. + * @param diagnostics + * @param index + * @param version + * @return InputReadErrorEvent instance + */ + @VisibleForTesting + public static InputReadErrorEvent create(String diagnostics, int index, int version) { + return create(diagnostics, index, version, 1, false, false, null); } - public static InputReadErrorEvent create(String diagnostics, int index, - int version) { - return new InputReadErrorEvent(diagnostics, index, version); + public static InputReadErrorEvent create(String diagnostics, int index, int version, boolean isLocalFetch, + boolean isDiskErrorAtSource, String destinationLocalhostName) { + return create(diagnostics, index, version, 1, isLocalFetch, isDiskErrorAtSource, destinationLocalhostName); + } + + /** + * Create an InputReadErrorEvent. + */ + public static InputReadErrorEvent create(final String diagnostics, final int index, final int version, + final int numFailures, boolean isLocalFetch, boolean isDiskErrorAtSource, String destinationLocalhostName) { + return new InputReadErrorEvent(diagnostics, index, version, numFailures, isLocalFetch, isDiskErrorAtSource, + destinationLocalhostName); } public String getDiagnostics() { @@ -69,4 +119,39 @@ public int getVersion() { return version; } + /** + * @return number of failures + */ + public int getNumFailures() { + return numFailures; + } + + public boolean isLocalFetch() { + return isLocalFetch; + } + + public boolean isDiskErrorAtSource() { + return isDiskErrorAtSource; + } + + public String getDestinationLocalhostName(){ + return destinationLocalhostName; + } + + @Override + public int hashCode() { + return Objects.hash(index, version); + } + + @Override + public boolean equals(final Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + InputReadErrorEvent that = (InputReadErrorEvent) o; + return index == that.index && version == that.version; + } } diff --git a/tez-api/src/main/java/org/apache/tez/runtime/api/events/InputUpdatePayloadEvent.java b/tez-api/src/main/java/org/apache/tez/runtime/api/events/InputUpdatePayloadEvent.java index 2cfec69589..8c336354d9 100644 --- a/tez-api/src/main/java/org/apache/tez/runtime/api/events/InputUpdatePayloadEvent.java +++ b/tez-api/src/main/java/org/apache/tez/runtime/api/events/InputUpdatePayloadEvent.java @@ -19,14 +19,13 @@ package org.apache.tez.runtime.api.events; import java.nio.ByteBuffer; +import java.util.Objects; import org.apache.hadoop.classification.InterfaceAudience.Public; import org.apache.hadoop.classification.InterfaceStability.Unstable; import org.apache.tez.runtime.api.Event; import org.apache.tez.runtime.api.InputInitializer; -import com.google.common.base.Preconditions; - /** * Events used by {@link InputInitializer} implementations to update the * shared user payload for the Input that is being initialized.

@@ -41,7 +40,7 @@ public class InputUpdatePayloadEvent extends Event { private final ByteBuffer userPayload; private InputUpdatePayloadEvent(ByteBuffer userPayload) { - Preconditions.checkNotNull(userPayload); + Objects.requireNonNull(userPayload); this.userPayload = userPayload; } diff --git a/tez-api/src/main/java/org/apache/tez/runtime/api/events/VertexManagerEvent.java b/tez-api/src/main/java/org/apache/tez/runtime/api/events/VertexManagerEvent.java index 9e73fe5d00..7a450c6cdf 100644 --- a/tez-api/src/main/java/org/apache/tez/runtime/api/events/VertexManagerEvent.java +++ b/tez-api/src/main/java/org/apache/tez/runtime/api/events/VertexManagerEvent.java @@ -19,6 +19,7 @@ package org.apache.tez.runtime.api.events; import java.nio.ByteBuffer; +import java.util.Objects; import org.apache.hadoop.classification.InterfaceAudience.Private; import org.apache.hadoop.classification.InterfaceAudience.Public; @@ -27,8 +28,6 @@ import org.apache.tez.runtime.api.Event; import org.apache.tez.runtime.api.TaskAttemptIdentifier; -import com.google.common.base.Preconditions; - /** * Event used to send information from a Task to the VertexManager for a vertex. * This may be used to send statistics like samples etc to the VertexManager for @@ -50,11 +49,17 @@ public class VertexManagerEvent extends Event { */ private final ByteBuffer userPayload; + /** + * Constructor. + * + * @param vertexName + * @param userPayload + * @throws NullPointerException if {@code vertexName} or {@code userPayload} + * is {@code null} + */ private VertexManagerEvent(String vertexName, ByteBuffer userPayload) { - Preconditions.checkArgument(vertexName != null); - Preconditions.checkArgument(userPayload != null); - this.targetVertexName = vertexName; - this.userPayload = userPayload; + this.targetVertexName = Objects.requireNonNull(vertexName); + this.userPayload = Objects.requireNonNull(userPayload); } /** diff --git a/tez-api/src/main/java/org/apache/tez/runtime/library/common/shuffle/api/ShuffleHandlerError.java b/tez-api/src/main/java/org/apache/tez/runtime/library/common/shuffle/api/ShuffleHandlerError.java new file mode 100644 index 0000000000..09137de673 --- /dev/null +++ b/tez-api/src/main/java/org/apache/tez/runtime/library/common/shuffle/api/ShuffleHandlerError.java @@ -0,0 +1,27 @@ +/** +* Licensed to the Apache Software Foundation (ASF) under one +* or more contributor license agreements. See the NOTICE file +* distributed with this work for additional information +* regarding copyright ownership. The ASF licenses this file +* to you under the Apache License, Version 2.0 (the +* "License"); you may not use this file except in compliance +* with the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ +package org.apache.tez.runtime.library.common.shuffle.api; + +/** + * ShuffleHandlerError enum encapsulates possible error messages that can be propagated from + * ShuffleHandler to fetchers. Depending on the message, fetchers can make better decisions, or give + * AM a hint in order to let it make better decisions in case of shuffle issues. + */ +public enum ShuffleHandlerError { + DISK_ERROR_EXCEPTION +} diff --git a/tez-api/src/main/java/org/apache/tez/runtime/library/common/shuffle/api/package-info.java b/tez-api/src/main/java/org/apache/tez/runtime/library/common/shuffle/api/package-info.java new file mode 100644 index 0000000000..9ad8e61d50 --- /dev/null +++ b/tez-api/src/main/java/org/apache/tez/runtime/library/common/shuffle/api/package-info.java @@ -0,0 +1,22 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +@Private +package org.apache.tez.runtime.library.common.shuffle.api; + +import org.apache.hadoop.classification.InterfaceAudience.Private; \ No newline at end of file diff --git a/tez-api/src/main/java/org/apache/tez/serviceplugins/api/ContainerLauncherContext.java b/tez-api/src/main/java/org/apache/tez/serviceplugins/api/ContainerLauncherContext.java index ed1d58f78a..16d54f05b7 100644 --- a/tez-api/src/main/java/org/apache/tez/serviceplugins/api/ContainerLauncherContext.java +++ b/tez-api/src/main/java/org/apache/tez/serviceplugins/api/ContainerLauncherContext.java @@ -80,7 +80,7 @@ void containerCompleted(ContainerId containerId, int exitStatus, String diagnost * Get the number of nodes being handled by the specified source * * @param sourceName the relevant source name - * @return the initial payload + * @return the number of nodes */ int getNumNodes(String sourceName); diff --git a/tez-api/src/main/java/org/apache/tez/serviceplugins/api/DagInfo.java b/tez-api/src/main/java/org/apache/tez/serviceplugins/api/DagInfo.java index 4a8b9fa8ce..b05fa8bbb7 100644 --- a/tez-api/src/main/java/org/apache/tez/serviceplugins/api/DagInfo.java +++ b/tez-api/src/main/java/org/apache/tez/serviceplugins/api/DagInfo.java @@ -14,8 +14,11 @@ package org.apache.tez.serviceplugins.api; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.security.Credentials; +import java.util.BitSet; + public interface DagInfo { /** @@ -35,4 +38,10 @@ public interface DagInfo { * @return the credentials for the dag. */ Credentials getCredentials(); + + int getTotalVertices(); + + BitSet getVertexDescendants(int vertexIndex); + + Configuration getConf(); } diff --git a/tez-api/src/main/java/org/apache/tez/serviceplugins/api/ServicePluginsDescriptor.java b/tez-api/src/main/java/org/apache/tez/serviceplugins/api/ServicePluginsDescriptor.java index 39d2cb8102..c942a3ad3a 100644 --- a/tez-api/src/main/java/org/apache/tez/serviceplugins/api/ServicePluginsDescriptor.java +++ b/tez-api/src/main/java/org/apache/tez/serviceplugins/api/ServicePluginsDescriptor.java @@ -16,7 +16,7 @@ import java.util.Arrays; -import com.google.common.base.Preconditions; +import org.apache.tez.common.Preconditions; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.tez.dag.api.TezConfiguration; diff --git a/tez-api/src/main/java/org/apache/tez/serviceplugins/api/TaskScheduler.java b/tez-api/src/main/java/org/apache/tez/serviceplugins/api/TaskScheduler.java index b28a684de5..42ff56f2ba 100644 --- a/tez-api/src/main/java/org/apache/tez/serviceplugins/api/TaskScheduler.java +++ b/tez-api/src/main/java/org/apache/tez/serviceplugins/api/TaskScheduler.java @@ -14,6 +14,8 @@ package org.apache.tez.serviceplugins.api; +import java.util.List; + import javax.annotation.Nullable; import org.apache.hadoop.classification.InterfaceAudience; @@ -263,4 +265,19 @@ public abstract boolean deallocateTask(Object task, boolean taskSucceeded, */ public abstract void dagComplete() throws ServicePluginException; + /** + * Get the number of held containers. + */ + public int getHeldContainersCount() { + return 0; + } + + /** + * Callback to be used in the event of a container allocation. + */ + protected void onContainersAllocated(List containers) { + for (Container container : containers) { + getContext().containerAllocated(container); + } + } } diff --git a/tez-api/src/main/java/org/apache/tez/serviceplugins/api/TaskSchedulerContext.java b/tez-api/src/main/java/org/apache/tez/serviceplugins/api/TaskSchedulerContext.java index 97fe7ae834..74342e2807 100644 --- a/tez-api/src/main/java/org/apache/tez/serviceplugins/api/TaskSchedulerContext.java +++ b/tez-api/src/main/java/org/apache/tez/serviceplugins/api/TaskSchedulerContext.java @@ -84,6 +84,20 @@ void taskAllocated(Object task, Object appCookie, Container container); + /** + * Indicate to the framework that a container is being allocated. + * + * @param container the actual container + */ + void containerAllocated(Container container); + + /** + * Indicate to the framework that a container is being reused: + * there is a task assigned to an already used container. + * + * @param container the actual container + */ + void containerReused(Container container); /** * Indicate to the framework that a container has completed. This is typically used by sources @@ -220,4 +234,5 @@ void setApplicationRegistrationData( */ AMState getAMState(); + int getVertexIndexForTask(Object task); } diff --git a/tez-api/src/main/javadoc/resources/META-INF/LICENSE.txt b/tez-api/src/main/javadoc/resources/META-INF/LICENSE similarity index 100% rename from tez-api/src/main/javadoc/resources/META-INF/LICENSE.txt rename to tez-api/src/main/javadoc/resources/META-INF/LICENSE diff --git a/tez-api/src/main/resources/META-INF/NOTICE.txt b/tez-api/src/main/javadoc/resources/META-INF/NOTICE similarity index 69% rename from tez-api/src/main/resources/META-INF/NOTICE.txt rename to tez-api/src/main/javadoc/resources/META-INF/NOTICE index 3f36fcc6ba..2595905699 100644 --- a/tez-api/src/main/resources/META-INF/NOTICE.txt +++ b/tez-api/src/main/javadoc/resources/META-INF/NOTICE @@ -1,5 +1,5 @@ Apache Tez -Copyright (c) 2016 The Apache Software Foundation +Copyright 2014-2024 The Apache Software Foundation This product includes software developed at The Apache Software Foundation (http://www.apache.org/). diff --git a/tez-api/src/main/proto/DAGApiRecords.proto b/tez-api/src/main/proto/DAGApiRecords.proto index c84094b51e..15f681db96 100644 --- a/tez-api/src/main/proto/DAGApiRecords.proto +++ b/tez-api/src/main/proto/DAGApiRecords.proto @@ -227,6 +227,7 @@ message ProgressProto { optional int32 killedTaskCount = 5; optional int32 failedTaskAttemptCount = 6; optional int32 killedTaskAttemptCount = 7; + optional int32 rejectedTaskAttemptCount = 8; } enum VertexStatusStateProto { @@ -244,10 +245,11 @@ enum VertexStatusStateProto { } message VertexStatusProto { - optional VertexStatusStateProto state = 1; - repeated string diagnostics = 2; - optional ProgressProto progress = 3; - optional TezCountersProto vertexCounters = 4; + required string id = 1; + optional VertexStatusStateProto state = 2; + repeated string diagnostics = 3; + optional ProgressProto progress = 4; + optional TezCountersProto vertexCounters = 5; } enum DAGStatusStateProto { @@ -273,6 +275,8 @@ message DAGStatusProto { optional ProgressProto DAGProgress = 3; repeated StringProgressPairProto vertexProgress = 4; optional TezCountersProto dagCounters = 5; + optional int64 memoryUsedByAM = 6; + optional int64 memoryUsedByTasks = 7; } message PlanLocalResourcesProto { @@ -297,6 +301,7 @@ message TezCountersProto { enum StatusGetOptsProto { GET_COUNTERS = 0; + GET_MEMORY_USAGE = 1; } message VertexLocationHintProto { diff --git a/tez-api/src/main/proto/DAGClientAMProtocol.proto b/tez-api/src/main/proto/DAGClientAMProtocol.proto index 113c9ccfce..f0ff3916ea 100644 --- a/tez-api/src/main/proto/DAGClientAMProtocol.proto +++ b/tez-api/src/main/proto/DAGClientAMProtocol.proto @@ -90,6 +90,13 @@ message GetAMStatusResponseProto { required TezAppMasterStatusProto status = 1; } +message GetWebUIAddressRequestProto { +} + +message GetWebUIAddressResponseProto { + required string web_ui_address = 1; +} + service DAGClientAMProtocol { rpc getAllDAGs (GetAllDAGsRequestProto) returns (GetAllDAGsResponseProto); rpc getDAGStatus (GetDAGStatusRequestProto) returns (GetDAGStatusResponseProto); @@ -98,4 +105,5 @@ service DAGClientAMProtocol { rpc submitDAG (SubmitDAGRequestProto) returns (SubmitDAGResponseProto); rpc shutdownSession (ShutdownSessionRequestProto) returns (ShutdownSessionResponseProto); rpc getAMStatus (GetAMStatusRequestProto) returns (GetAMStatusResponseProto); + rpc getWebUIAddress (GetWebUIAddressRequestProto) returns (GetWebUIAddressResponseProto); } diff --git a/tez-api/src/main/proto/Events.proto b/tez-api/src/main/proto/Events.proto index e018864939..05896ac62e 100644 --- a/tez-api/src/main/proto/Events.proto +++ b/tez-api/src/main/proto/Events.proto @@ -39,6 +39,9 @@ message InputReadErrorEventProto { optional int32 index = 1; optional string diagnostics = 2; optional int32 version = 3; + optional bool is_local_fetch = 4; + optional bool is_disk_error_at_source = 5; + optional string destination_localhost_name = 6; } message InputFailedEventProto { @@ -55,6 +58,7 @@ message RootInputDataInformationEventProto { optional int32 source_index = 1; optional int32 target_index = 2; optional bytes user_payload = 3; + optional bytes serialized_path = 4; } message CompositeEventProto { @@ -69,3 +73,8 @@ message RootInputInitializerEventProto { optional string target_input_name = 2; optional bytes user_payload = 3; } + +message CustomProcessorEventProto { + optional bytes user_payload = 1; + required int32 version = 2; +} diff --git a/tez-api/src/main/resources/META-INF/LICENSE.txt b/tez-api/src/main/resources/META-INF/LICENSE similarity index 100% rename from tez-api/src/main/resources/META-INF/LICENSE.txt rename to tez-api/src/main/resources/META-INF/LICENSE diff --git a/NOTICE.txt b/tez-api/src/main/resources/META-INF/NOTICE similarity index 69% rename from NOTICE.txt rename to tez-api/src/main/resources/META-INF/NOTICE index 3f36fcc6ba..2595905699 100644 --- a/NOTICE.txt +++ b/tez-api/src/main/resources/META-INF/NOTICE @@ -1,5 +1,5 @@ Apache Tez -Copyright (c) 2016 The Apache Software Foundation +Copyright 2014-2024 The Apache Software Foundation This product includes software developed at The Apache Software Foundation (http://www.apache.org/). diff --git a/tez-api/src/main/resources/tez-api-version-info.properties b/tez-api/src/main/resources/tez-api-version-info.properties index 4bb7d40b8b..0bc30c4bb0 100644 --- a/tez-api/src/main/resources/tez-api-version-info.properties +++ b/tez-api/src/main/resources/tez-api-version-info.properties @@ -19,4 +19,6 @@ version=${pom.version} revision=${buildNumber} buildtime=${build.time} +builduser=${user.name} +buildjavaversion=${java.version} scmurl=${scm.url} diff --git a/tez-api/src/test/java/org/apache/tez/client/TestTezClient.java b/tez-api/src/test/java/org/apache/tez/client/TestTezClient.java index 89310df83e..1b93f924a2 100644 --- a/tez-api/src/test/java/org/apache/tez/client/TestTezClient.java +++ b/tez-api/src/test/java/org/apache/tez/client/TestTezClient.java @@ -18,6 +18,7 @@ package org.apache.tez.client; +import java.io.File; import java.io.IOException; import java.net.InetAddress; import java.nio.ByteBuffer; @@ -37,13 +38,14 @@ import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertNull; import static org.junit.Assert.assertTrue; -import static org.mockito.Matchers.any; import static org.mockito.Mockito.RETURNS_DEEP_STUBS; -import static org.mockito.Mockito.times; +import static org.mockito.Mockito.any; import static org.mockito.Mockito.atLeast; import static org.mockito.Mockito.doCallRealMethod; +import static org.mockito.Mockito.isNull; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.spy; +import static org.mockito.Mockito.times; import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; @@ -54,6 +56,7 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.security.Credentials; +import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.util.Time; import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.ApplicationReport; @@ -65,7 +68,6 @@ import org.apache.hadoop.yarn.api.records.URL; import org.apache.hadoop.yarn.api.records.YarnApplicationState; import org.apache.hadoop.yarn.client.api.YarnClient; -import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.exceptions.ApplicationNotFoundException; import org.apache.hadoop.yarn.exceptions.YarnException; import org.apache.tez.common.counters.LimitExceededException; @@ -87,9 +89,12 @@ import org.apache.tez.dag.api.client.rpc.DAGClientAMProtocolBlockingPB; import org.apache.tez.dag.api.client.rpc.DAGClientAMProtocolRPC.GetAMStatusRequestProto; import org.apache.tez.dag.api.client.rpc.DAGClientAMProtocolRPC.GetAMStatusResponseProto; -import org.apache.tez.dag.api.client.rpc.DAGClientAMProtocolRPC.ShutdownSessionRequestProto; +import org.apache.tez.dag.api.client.rpc.DAGClientAMProtocolRPC.GetDAGStatusResponseProto; import org.apache.tez.dag.api.client.rpc.DAGClientAMProtocolRPC.SubmitDAGRequestProto; import org.apache.tez.dag.api.client.rpc.DAGClientAMProtocolRPC.TezAppMasterStatusProto; +import org.apache.tez.dag.api.records.DAGProtos.DAGStatusProto; +import org.apache.tez.dag.api.records.DAGProtos.DAGStatusStateProto; +import org.apache.tez.dag.api.records.DAGProtos.ProgressProto; import org.apache.tez.serviceplugins.api.ServicePluginsDescriptor; import org.hamcrest.CoreMatchers; import org.junit.Assert; @@ -101,35 +106,73 @@ public class TestTezClient { static final long HARD_KILL_TIMEOUT = 1500L; + private static final File STAGING_DIR = new File(System.getProperty("test.build.data"), + TestTezClient.class.getName()).getAbsoluteFile(); class TezClientForTest extends TezClient { - TezYarnClient mockTezYarnClient; DAGClientAMProtocolBlockingPB sessionAmProxy; YarnClient mockYarnClient; ApplicationId mockAppId; boolean callRealGetSessionAMProxy; + Long prewarmTimeoutMs; public TezClientForTest(String name, TezConfiguration tezConf, - @Nullable Map localResources, - @Nullable Credentials credentials) { + @Nullable Map localResources, @Nullable Credentials credentials) { super(name, tezConf, localResources, credentials); } - + @Override protected FrameworkClient createFrameworkClient() { - return mockTezYarnClient; + return frameworkClient; // already initialized } - + + public void setPrewarmTimeoutMs(Long prewarmTimeoutMs) { + this.prewarmTimeoutMs = prewarmTimeoutMs; + } + @Override - protected DAGClientAMProtocolBlockingPB getAMProxy(ApplicationId appId) - throws TezException, IOException { - if (!callRealGetSessionAMProxy) { - return sessionAmProxy; + protected long getPrewarmWaitTimeMs() { + return prewarmTimeoutMs == null ? super.getPrewarmWaitTimeMs() : prewarmTimeoutMs; + } + } + + class TezYarnClientForTest extends TezYarnClient { + private TezClientForTest client; + + protected TezYarnClientForTest(YarnClient yarnClient, TezClientForTest client) { + super(yarnClient); + this.client = client; + } + + @Override + protected DAGClientAMProtocolBlockingPB waitForProxy(long clientTimeout, Configuration conf, + ApplicationId sessionAppId, UserGroupInformation ugi) throws TezException, IOException { + if (!client.callRealGetSessionAMProxy) { + return client.sessionAmProxy; + } + return super.getProxy(conf, sessionAppId, ugi); + } + + @Override + protected DAGClientAMProtocolBlockingPB getProxy(Configuration conf, ApplicationId sessionAppId, + UserGroupInformation ugi) throws TezException, IOException { + if (!client.callRealGetSessionAMProxy) { + return client.sessionAmProxy; } - return super.getAMProxy(appId); + return super.getProxy(conf, sessionAppId, ugi); + } + + @Override + public String getAmHost() { + return "testhost"; + } + + @Override + public int getAmPort() { + return 1234; } } - + TezClientForTest configureAndCreateTezClient() throws YarnException, IOException, ServiceException { return configureAndCreateTezClient(null); } @@ -146,34 +189,36 @@ TezClientForTest configureAndCreateTezClient(Map lrs, boo } conf.setBoolean(TezConfiguration.TEZ_IGNORE_LIB_URIS, true); conf.setBoolean(TezConfiguration.TEZ_AM_SESSION_MODE, isSession); + conf.set(TezConfiguration.TEZ_AM_STAGING_DIR, STAGING_DIR.getAbsolutePath()); TezClientForTest client = new TezClientForTest("test", conf, lrs, null); ApplicationId appId1 = ApplicationId.newInstance(0, 1); YarnClient yarnClient = mock(YarnClient.class, RETURNS_DEEP_STUBS); when(yarnClient.createApplication().getNewApplicationResponse().getApplicationId()).thenReturn(appId1); when(yarnClient.getApplicationReport(appId1).getYarnApplicationState()).thenReturn(YarnApplicationState.NEW); - when(yarnClient.submitApplication(any(ApplicationSubmissionContext.class))).thenReturn(appId1); + when(yarnClient.submitApplication(any())).thenReturn(appId1); DAGClientAMProtocolBlockingPB sessionAmProxy = mock(DAGClientAMProtocolBlockingPB.class, RETURNS_DEEP_STUBS); - when(sessionAmProxy.getAMStatus(any(RpcController.class), any(GetAMStatusRequestProto.class))) + when(sessionAmProxy.getAMStatus(any(), any())) + .thenReturn(GetAMStatusResponseProto.newBuilder().setStatus(TezAppMasterStatusProto.RUNNING).build()) .thenReturn(GetAMStatusResponseProto.newBuilder().setStatus(TezAppMasterStatusProto.RUNNING).build()); client.sessionAmProxy = sessionAmProxy; - client.mockTezYarnClient = new TezYarnClient(yarnClient); + client.frameworkClient = new TezYarnClientForTest(yarnClient, client); client.mockYarnClient = yarnClient; client.mockAppId = appId1; - - return client; + + return client; } @Test (timeout = 5000) - public void testTezclientApp() throws Exception { - testTezClient(false); + public void testTezClientApp() throws Exception { + testTezClient(false, true, "testTezClientApp"); } @Test (timeout = 5000) - public void testTezclientSession() throws Exception { - testTezClient(true); + public void testTezClientSession() throws Exception { + testTezClient(true, true, "testTezClientSession"); } @Test (timeout = 5000) @@ -211,7 +256,7 @@ private void _testTezClientSessionLargeDAGPlan(int maxIPCMsgSize, int payloadSiz ProcessorDescriptor processorDescriptor = ProcessorDescriptor.create("P"); processorDescriptor.setUserPayload(UserPayload.create(ByteBuffer.allocate(payloadSize))); Vertex vertex = Vertex.create("Vertex", processorDescriptor, 1, Resource.newInstance(1, 1)); - DAG dag = DAG.create("DAG").addVertex(vertex); + DAG dag = DAG.create("DAG-testTezClientSessionLargeDAGPlan").addVertex(vertex); client.start(); client.addAppMasterLocalFiles(localResourceMap); @@ -219,7 +264,7 @@ private void _testTezClientSessionLargeDAGPlan(int maxIPCMsgSize, int payloadSiz client.stop(); ArgumentCaptor captor = ArgumentCaptor.forClass(SubmitDAGRequestProto.class); - verify(client.sessionAmProxy).submitDAG((RpcController)any(), captor.capture()); + verify(client.sessionAmProxy).submitDAG(any(), captor.capture()); SubmitDAGRequestProto request = captor.getValue(); if (shouldSerialize) { @@ -238,8 +283,51 @@ private void _testTezClientSessionLargeDAGPlan(int maxIPCMsgSize, int payloadSiz assertTrue(request.hasAdditionalAmResources()); } } + + @Test (timeout = 5000) + public void testGetClient() throws Exception { + /* BEGIN first TezClient usage without calling stop() */ + TezClientForTest client = testTezClient(true, false, "testGetClient"); + /* END first TezClient usage without calling stop() */ + + /* BEGIN reuse of AM from new TezClient */ + ArgumentCaptor captor = ArgumentCaptor.forClass(ApplicationSubmissionContext.class); + when(client.mockYarnClient.getApplicationReport(client.mockAppId).getYarnApplicationState()) + .thenReturn(YarnApplicationState.RUNNING); + + //Reuse existing appId from first TezClient + ApplicationId existingAppId = client.mockAppId; + TezClientForTest client2 = configureAndCreateTezClient(null, true, + client.amConfig.getTezConfiguration()); + String mockLR1Name = "LR1"; + Map lrDAG = Collections.singletonMap(mockLR1Name, LocalResource + .newInstance(URL.newInstance("file", "localhost", 0, "/test1"), LocalResourceType.FILE, + LocalResourceVisibility.PUBLIC, 1, 1)); + Vertex vertex = Vertex.create("Vertex", ProcessorDescriptor.create("P"), 1, + Resource.newInstance(1, 1)); + DAG dag = DAG.create("DAG-testGetClient").addVertex(vertex).addTaskLocalFiles(lrDAG); + + //Bind TezClient to existing app and submit a dag + DAGClient dagClient = client2.getClient(existingAppId).submitDAG(dag); + + assertTrue(dagClient.getExecutionContext().contains(existingAppId.toString())); + assertEquals(dagClient.getSessionIdentifierString(), existingAppId.toString()); + + // Validate request for new AM is not submitted to RM */ + verify(client2.mockYarnClient, times(0)).submitApplication(captor.capture()); + + // Validate dag submission from second TezClient as normal */ + verify(client2.sessionAmProxy, times(1)).submitDAG(any(), any()); + + // Validate stop from new TezClient as normal */ + client2.stop(); + verify(client2.sessionAmProxy, times(1)).shutdownSession(any(), + any()); + verify(client2.mockYarnClient, times(1)).stop(); + /* END reuse of AM from new TezClient */ + } - public void testTezClient(boolean isSession) throws Exception { + public TezClientForTest testTezClient(boolean isSession, boolean shouldStop, String dagName) throws Exception { Map lrs = Maps.newHashMap(); String lrName1 = "LR1"; lrs.put(lrName1, LocalResource.newInstance(URL.newInstance("file", "localhost", 0, "/test"), @@ -251,7 +339,7 @@ public void testTezClient(boolean isSession) throws Exception { when(client.mockYarnClient.getApplicationReport(client.mockAppId).getYarnApplicationState()) .thenReturn(YarnApplicationState.RUNNING); client.start(); - verify(client.mockYarnClient, times(1)).init((Configuration)any()); + verify(client.mockYarnClient, times(1)).init(any()); verify(client.mockYarnClient, times(1)).start(); if (isSession) { verify(client.mockYarnClient, times(1)).submitApplication(captor.capture()); @@ -273,7 +361,11 @@ public void testTezClient(boolean isSession) throws Exception { LocalResourceVisibility.PUBLIC, 1, 1)); Vertex vertex = Vertex.create("Vertex", ProcessorDescriptor.create("P"), 1, Resource.newInstance(1, 1)); - DAG dag = DAG.create("DAG").addVertex(vertex).addTaskLocalFiles(lrDAG); + DAG dag = DAG.create("DAG-" + dagName).addVertex(vertex).addTaskLocalFiles(lrDAG); + if (!isSession) { + when(client.sessionAmProxy.getAMStatus(any(), any())) + .thenReturn(GetAMStatusResponseProto.newBuilder().setStatus(TezAppMasterStatusProto.SHUTDOWN).build()); + } DAGClient dagClient = client.submitDAG(dag); assertTrue(dagClient.getExecutionContext().contains(client.mockAppId.toString())); @@ -281,7 +373,7 @@ public void testTezClient(boolean isSession) throws Exception { if (isSession) { verify(client.mockYarnClient, times(1)).submitApplication(captor.capture()); - verify(client.sessionAmProxy, times(1)).submitDAG((RpcController)any(), (SubmitDAGRequestProto) any()); + verify(client.sessionAmProxy, times(1)).submitDAG(any(), any()); } else { verify(client.mockYarnClient, times(1)).submitApplication(captor.capture()); ApplicationSubmissionContext context = captor.getValue(); @@ -309,7 +401,7 @@ public void testTezClient(boolean isSession) throws Exception { when(client.mockYarnClient.getApplicationReport(appId2).getYarnApplicationState()) .thenReturn(YarnApplicationState.RUNNING); - dag = DAG.create("DAG").addVertex( + dag = DAG.create("DAG-2-" + dagName).addVertex( Vertex.create("Vertex", ProcessorDescriptor.create("P"), 1, Resource.newInstance(1, 1))); dagClient = client.submitDAG(dag); @@ -320,7 +412,7 @@ public void testTezClient(boolean isSession) throws Exception { assertEquals(dagClient.getSessionIdentifierString(), client.mockAppId.toString()); // additional resource is sent ArgumentCaptor captor1 = ArgumentCaptor.forClass(SubmitDAGRequestProto.class); - verify(client.sessionAmProxy, times(2)).submitDAG((RpcController)any(), captor1.capture()); + verify(client.sessionAmProxy, times(2)).submitDAG(any(), captor1.capture()); SubmitDAGRequestProto proto = captor1.getValue(); Assert.assertEquals(1, proto.getAdditionalAmResources().getLocalResourcesCount()); Assert.assertEquals(lrName2, proto.getAdditionalAmResources().getLocalResources(0).getName()); @@ -343,13 +435,16 @@ public void testTezClient(boolean isSession) throws Exception { assertTrue(context.getAMContainerSpec().getLocalResources().containsKey( lrName2)); } - - client.stop(); - if (isSession) { - verify(client.sessionAmProxy, times(1)).shutdownSession((RpcController) any(), - (ShutdownSessionRequestProto) any()); + + if(shouldStop) { + client.stop(); + if (isSession) { + verify(client.sessionAmProxy, times(1)).shutdownSession(any(), + any()); + } + verify(client.mockYarnClient, times(1)).stop(); } - verify(client.mockYarnClient, times(1)).stop(); + return client; } @Test (timeout=5000) @@ -358,23 +453,54 @@ public void testPreWarm() throws Exception { client.start(); when(client.mockYarnClient.getApplicationReport(client.mockAppId).getYarnApplicationState()) - .thenReturn(YarnApplicationState.RUNNING); + .thenReturn(YarnApplicationState.RUNNING); when( - client.sessionAmProxy.getAMStatus((RpcController) any(), (GetAMStatusRequestProto) any())) + client.sessionAmProxy.getAMStatus(any(), any())) .thenReturn(GetAMStatusResponseProto.newBuilder().setStatus(TezAppMasterStatusProto.READY).build()); PreWarmVertex vertex = PreWarmVertex.create("PreWarm", 1, Resource.newInstance(1, 1)); client.preWarm(vertex); ArgumentCaptor captor1 = ArgumentCaptor.forClass(SubmitDAGRequestProto.class); - verify(client.sessionAmProxy, times(1)).submitDAG((RpcController)any(), captor1.capture()); + verify(client.sessionAmProxy, times(1)).submitDAG(any(), captor1.capture()); SubmitDAGRequestProto proto = captor1.getValue(); assertTrue(proto.getDAGPlan().getName().startsWith(TezConstants.TEZ_PREWARM_DAG_NAME_PREFIX)); + setClientToReportStoppedDags(client); + client.stop(); + } + + + @Test (timeout=5000) + public void testPreWarmCloseStuck() throws Exception { + TezClientForTest client = configureAndCreateTezClient(); + client.setPrewarmTimeoutMs(10L); // Don't wait too long. + client.start(); + + when(client.mockYarnClient.getApplicationReport(client.mockAppId).getYarnApplicationState()) + .thenReturn(YarnApplicationState.RUNNING); + when(client.sessionAmProxy.getAMStatus(any(), any())) + .thenReturn(GetAMStatusResponseProto.newBuilder().setStatus(TezAppMasterStatusProto.READY).build()); + + PreWarmVertex vertex = PreWarmVertex.create("PreWarm", 1, Resource.newInstance(1, 1)); + client.preWarm(vertex); + // Keep prewarm in "running" state. Client should give up waiting; if it doesn't, the test will time out. client.stop(); } + + private void setClientToReportStoppedDags(TezClientForTest client) throws Exception { + when(client.mockYarnClient.getApplicationReport(client.mockAppId).getYarnApplicationState()) + .thenReturn(YarnApplicationState.FINISHED); + when(client.sessionAmProxy.getDAGStatus(isNull(), any())) + .thenReturn(GetDAGStatusResponseProto.newBuilder().setDagStatus(DAGStatusProto.newBuilder() + .addDiagnostics("Diagnostics_0").setState(DAGStatusStateProto.DAG_SUCCEEDED) + .setDAGProgress(ProgressProto.newBuilder() + .setFailedTaskCount(0).setKilledTaskCount(0).setRunningTaskCount(0) + .setSucceededTaskCount(1).setTotalTaskCount(1).build()).build()).build()); + } + @Test (timeout=30000) public void testPreWarmWithTimeout() throws Exception { long startTime = 0 , endTime = 0; @@ -389,8 +515,8 @@ public void testPreWarmWithTimeout() throws Exception { spyClient.mockAppId).getYarnApplicationState()) .thenReturn(YarnApplicationState.RUNNING); when( - spyClient.sessionAmProxy.getAMStatus((RpcController) any(), - (GetAMStatusRequestProto) any())) + spyClient.sessionAmProxy.getAMStatus(any(), + any())) .thenReturn( GetAMStatusResponseProto.newBuilder().setStatus( TezAppMasterStatusProto.INITIALIZING).build()); @@ -405,15 +531,15 @@ public void testPreWarmWithTimeout() throws Exception { endTime = Time.monotonicNow(); assertTrue("Time taken is not as expected", (endTime - startTime) > timeout); - verify(spyClient, times(0)).submitDAG(any(DAG.class)); - Assert.assertTrue("Unexpected Exception message", + verify(spyClient, times(0)).submitDAG(any()); + Assert.assertTrue("Unexpected Exception message: " + te.getMessage(), te.getMessage().contains("Tez AM not ready")); } when( - spyClient.sessionAmProxy.getAMStatus((RpcController) any(), - (GetAMStatusRequestProto) any())) + spyClient.sessionAmProxy.getAMStatus(any(), + any())) .thenReturn( GetAMStatusResponseProto.newBuilder().setStatus( TezAppMasterStatusProto.READY).build()); @@ -423,7 +549,7 @@ public void testPreWarmWithTimeout() throws Exception { endTime = Time.monotonicNow(); assertTrue("Time taken is not as expected", (endTime - startTime) <= timeout); - verify(spyClient, times(1)).submitDAG(any(DAG.class)); + verify(spyClient, times(1)).submitDAG(any()); } catch (TezException te) { fail("PreWarm should have succeeded!"); } @@ -458,7 +584,8 @@ public void run() { endTime = Time.monotonicNow(); assertTrue("Time taken is not as expected", (endTime - startTime) <= timeout); - verify(spyClient, times(2)).submitDAG(any(DAG.class)); + verify(spyClient, times(2)).submitDAG(any()); + setClientToReportStoppedDags(client); spyClient.stop(); client.stop(); } @@ -486,7 +613,8 @@ public void testMultipleSubmissionsJob(boolean isSession) throws Exception { LocalResourceVisibility.PUBLIC, 1, 1)); Vertex vertex = Vertex.create("Vertex", ProcessorDescriptor.create("P"), 1, Resource.newInstance(1, 1)).addTaskLocalFiles(lrVertex); - DAG dag = DAG.create("DAG").addVertex(vertex).addTaskLocalFiles(lrDAG); + DAG dag = + DAG.create("DAG-testMultipleSubmissionsJob-session-" + isSession).addVertex(vertex).addTaskLocalFiles(lrDAG); // the dag resource will be added to the vertex once client1.submitDAG(dag); @@ -577,7 +705,7 @@ public void testSubmitDAGAppFailed() throws Exception { Vertex vertex = Vertex.create("Vertex", ProcessorDescriptor.create("P"), 1, Resource.newInstance(1, 1)); - DAG dag = DAG.create("DAG").addVertex(vertex); + DAG dag = DAG.create("DAG-testSubmitDAGAppFailed").addVertex(vertex); try { client.submitDAG(dag); @@ -767,7 +895,7 @@ public void testClientResubmit() throws Exception { Vertex vertex2 = Vertex.create("Vertex2", ProcessorDescriptor.create("P2"), 1, Resource.newInstance(1, 1)); vertex2.setTaskLaunchCmdOpts("-XX:+UseParallelGC -XX:+UseG1GC"); - DAG dag = DAG.create("DAG").addVertex(vertex1).addVertex(vertex2).addTaskLocalFiles(lrDAG); + DAG dag = DAG.create("DAG-testClientResubmit").addVertex(vertex1).addVertex(vertex2).addTaskLocalFiles(lrDAG); for (int i = 0; i < 3; ++i) { try { client.submitDAG(dag); @@ -790,7 +918,7 @@ public void testMissingYarnAppStatus() throws Exception { when(yarnClient.createApplication().getNewApplicationResponse().getApplicationId()).thenReturn(appId1); when(yarnClient.getApplicationReport(appId1)).thenReturn(mockReport); TezYarnClient tezClient = new TezYarnClient(yarnClient); - tezClient.init(new TezConfiguration(false), new YarnConfiguration()); + tezClient.init(new TezConfiguration(false)); try { tezClient.getApplicationReport(appId1); fail("getApplicationReport should have thrown"); @@ -812,8 +940,7 @@ public void testAMClientHeartbeat() throws Exception { Thread.sleep(1000); } client.stop(); - verify(client.sessionAmProxy, atLeast(3)).getAMStatus(any(RpcController.class), - any(GetAMStatusRequestProto.class)); + verify(client.sessionAmProxy, atLeast(3)).getAMStatus(any(), any()); conf.setInt(TezConfiguration.TEZ_AM_CLIENT_HEARTBEAT_TIMEOUT_SECS, -1); final TezClientForTest client2 = configureAndCreateTezClient(conf); @@ -826,10 +953,7 @@ public void testAMClientHeartbeat() throws Exception { Thread.sleep(1000); } client2.stop(); - verify(client2.sessionAmProxy, times(0)).getAMStatus(any(RpcController.class), - any(GetAMStatusRequestProto.class)); - - + verify(client2.sessionAmProxy, times(0)).getAMStatus(any(), any()); } @Test(timeout = 20000) @@ -873,12 +997,31 @@ public void testAMHeartbeatFailOnGetAMStatus() throws Exception { final TezClientForTest client = configureAndCreateTezClient(conf); client.start(); - when(client.sessionAmProxy.getAMStatus(any(RpcController.class), - any(GetAMStatusRequestProto.class))).thenThrow(new ServiceException("error")); + when(client.sessionAmProxy.getAMStatus(any(), any())).thenThrow(new ServiceException("error")); client.callRealGetSessionAMProxy = true; when(client.mockYarnClient.getApplicationReport(client.mockAppId).getYarnApplicationState()) .thenReturn(YarnApplicationState.FAILED); Thread.sleep(3 * amHeartBeatTimeoutSecs * 1000); assertTrue(client.getAMKeepAliveService().isTerminated()); } + + //See TEZ-3874 + @Test(timeout = 5000) + public void testYarnZkDeprecatedConf() { + Configuration conf = new Configuration(false); + String val = "hostname:2181"; + conf.set("yarn.resourcemanager.zk-address", val); + + //Test that Exception is not thrown by createFinalConfProtoForApp + TezClientUtils.createFinalConfProtoForApp(conf, null); + } + + @Test + public void testGetAmHostAndPort() throws Exception { + final TezClientForTest client = configureAndCreateTezClient(new TezConfiguration()); + + // TezClient exposes AM host and port from the FrameworkClient (now it's a TezYarnClientForTest) + assertEquals("testhost", client.getAmHost()); + assertEquals(1234, client.getAmPort()); + } } diff --git a/tez-api/src/test/java/org/apache/tez/client/TestTezClientUtils.java b/tez-api/src/test/java/org/apache/tez/client/TestTezClientUtils.java index 7ff8125768..a2f1ce1175 100644 --- a/tez-api/src/test/java/org/apache/tez/client/TestTezClientUtils.java +++ b/tez-api/src/test/java/org/apache/tez/client/TestTezClientUtils.java @@ -26,8 +26,8 @@ import java.io.File; import java.io.FileNotFoundException; import java.io.IOException; +import java.net.MalformedURLException; import java.net.URL; -import java.net.URLClassLoader; import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.Arrays; @@ -47,8 +47,10 @@ import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.hdfs.DistributedFileSystem; import org.apache.hadoop.io.DataInputByteBuffer; +import org.apache.hadoop.io.Text; import org.apache.hadoop.security.Credentials; import org.apache.hadoop.security.token.Token; +import org.apache.hadoop.security.token.TokenIdentifier; import org.apache.hadoop.yarn.api.ApplicationConstants; import org.apache.hadoop.yarn.api.ApplicationConstants.Environment; import org.apache.hadoop.yarn.api.records.ApplicationId; @@ -81,6 +83,8 @@ public class TestTezClientUtils { private static String TEST_ROOT_DIR = "target" + Path.SEPARATOR + TestTezClientUtils.class.getName() + "-tmpDir"; + private static final File STAGING_DIR = new File(System.getProperty("test.build.data", "target"), + TestTezClientUtils.class.getName()).getAbsoluteFile(); /** * */ @@ -128,12 +132,29 @@ public void validateSetTezJarLocalResourcesDefinedNonExistingDirectory() throws TezClientUtils.setupTezJarsLocalResources(conf, credentials, resources); } - /** - * - */ - @Test (timeout=10000) + private static List getDirAndFileURL() throws MalformedURLException { + String[] classpaths = System.getProperty("java.class.path") + .split(System.getProperty("path.separator")); + List urls = new ArrayList<>(2); + File lastFile = null; + // Add one file and one directory. + for (String path : classpaths) { + URL url = new URL("file://" + path); + File file = FileUtils.toFile(url); + if (lastFile == null) { + lastFile = file; + urls.add(url); + } else if (lastFile.isDirectory() != file.isDirectory()) { + urls.add(url); + break; + } + } + return urls; + } + + @Test (timeout=20000) public void validateSetTezJarLocalResourcesDefinedExistingDirectory() throws Exception { - URL[] cp = ((URLClassLoader)ClassLoader.getSystemClassLoader()).getURLs(); + List cp = getDirAndFileURL(); StringBuffer buffer = new StringBuffer(); for (URL url : cp) { buffer.append(url.toExternalForm()); @@ -147,22 +168,27 @@ public void validateSetTezJarLocalResourcesDefinedExistingDirectory() throws Exc localizedMap); Assert.assertFalse(usingArchive); Set resourceNames = localizedMap.keySet(); + boolean assertedDir = false; + boolean assertedFile = false; for (URL url : cp) { File file = FileUtils.toFile(url); - if (file.isDirectory()){ + if (file.isDirectory()) { String[] firList = file.list(); for (String fileNme : firList) { File innerFile = new File(file, fileNme); if (!innerFile.isDirectory()){ assertTrue(resourceNames.contains(innerFile.getName())); + assertedDir = true; } // not supporting deep hierarchies } - } - else { + } else { assertTrue(resourceNames.contains(file.getName())); + assertedFile = true; } } + assertTrue(assertedDir); + assertTrue(assertedFile); } /** @@ -171,7 +197,7 @@ public void validateSetTezJarLocalResourcesDefinedExistingDirectory() throws Exc */ @Test (timeout=5000) public void validateSetTezJarLocalResourcesDefinedExistingDirectoryIgnored() throws Exception { - URL[] cp = ((URLClassLoader)ClassLoader.getSystemClassLoader()).getURLs(); + List cp = getDirAndFileURL(); StringBuffer buffer = new StringBuffer(); for (URL url : cp) { buffer.append(url.toExternalForm()); @@ -190,9 +216,9 @@ public void validateSetTezJarLocalResourcesDefinedExistingDirectoryIgnored() thr * * @throws Exception */ - @Test (timeout=5000) + @Test (timeout=20000) public void validateSetTezJarLocalResourcesDefinedExistingDirectoryIgnoredSetToFalse() throws Exception { - URL[] cp = ((URLClassLoader)ClassLoader.getSystemClassLoader()).getURLs(); + List cp = getDirAndFileURL(); StringBuffer buffer = new StringBuffer(); for (URL url : cp) { buffer.append(url.toExternalForm()); @@ -328,6 +354,7 @@ public void validateSetTezJarLocalResourcesMixTarballAndJar() throws Exception { // ApplicationSubmissionContext public void testAppSubmissionContextForPriority() throws Exception { TezConfiguration tezConf = new TezConfiguration(); + tezConf.set(TezConfiguration.TEZ_AM_STAGING_DIR, STAGING_DIR.getAbsolutePath()); int testpriority = 999; ApplicationId appId = ApplicationId.newInstance(1000, 1); Credentials credentials = new Credentials(); @@ -378,9 +405,10 @@ public void testSetApplicationTags() { public void testSessionTokenInAmClc() throws IOException, YarnException { TezConfiguration tezConf = new TezConfiguration(); + tezConf.set(TezConfiguration.TEZ_AM_STAGING_DIR, STAGING_DIR.getAbsolutePath()); ApplicationId appId = ApplicationId.newInstance(1000, 1); - DAG dag = DAG.create("testdag"); + DAG dag = DAG.create("testdag-testSessionTokenInAmClc"); dag.addVertex(Vertex.create("testVertex", ProcessorDescriptor.create("processorClassname"), 1) .setTaskLaunchCmdOpts("initialLaunchOpts")); @@ -415,12 +443,13 @@ public void testAMLoggingOptsSimple() throws IOException, YarnException { TezConfiguration tezConf = new TezConfiguration(); tezConf.set(TezConfiguration.TEZ_AM_LOG_LEVEL, "WARN"); + tezConf.set(TezConfiguration.TEZ_AM_STAGING_DIR, STAGING_DIR.getAbsolutePath()); ApplicationId appId = ApplicationId.newInstance(1000, 1); Credentials credentials = new Credentials(); JobTokenSecretManager jobTokenSecretManager = new JobTokenSecretManager(); TezClientUtils.createSessionToken(appId.toString(), jobTokenSecretManager, credentials); - DAG dag = DAG.create("testdag"); + DAG dag = DAG.create("DAG-testAMLoggingOptsSimple"); dag.addVertex(Vertex.create("testVertex", ProcessorDescriptor.create("processorClassname"), 1) .setTaskLaunchCmdOpts("initialLaunchOpts")); AMConfiguration amConf = @@ -455,12 +484,13 @@ public void testAMLoggingOptsPerLogger() throws IOException, YarnException { TezConfiguration tezConf = new TezConfiguration(); tezConf.set(TezConfiguration.TEZ_AM_LOG_LEVEL, "WARN;org.apache.hadoop.ipc=DEBUG;org.apache.hadoop.security=DEBUG"); + tezConf.set(TezConfiguration.TEZ_AM_STAGING_DIR, STAGING_DIR.getAbsolutePath()); ApplicationId appId = ApplicationId.newInstance(1000, 1); Credentials credentials = new Credentials(); JobTokenSecretManager jobTokenSecretManager = new JobTokenSecretManager(); TezClientUtils.createSessionToken(appId.toString(), jobTokenSecretManager, credentials); - DAG dag = DAG.create("testdag"); + DAG dag = DAG.create("DAG-testAMLoggingOptsPerLogger"); dag.addVertex(Vertex.create("testVertex", ProcessorDescriptor.create("processorClassname"), 1) .setTaskLaunchCmdOpts("initialLaunchOpts")); AMConfiguration amConf = @@ -652,6 +682,17 @@ public void testDefaultLoggingJavaOpts() { javaOpts.contains("-Dlog4j.configuratorClass=org.apache.tez.common.TezLog4jConfigurator")); } + @Test + public void testDefaultLoggingJavaOptsWithRootLogger() { + String origJavaOpts = "-D" + TezConstants.TEZ_ROOT_LOGGER_NAME + "=INFO -DtestProperty=value"; + String javaOpts = TezClientUtils.maybeAddDefaultLoggingJavaOpts("FOOBAR", origJavaOpts); + Assert.assertNotNull(javaOpts); + Assert.assertTrue(javaOpts.contains("-D" + TezConstants.TEZ_ROOT_LOGGER_NAME + "=FOOBAR")); + Assert.assertTrue(javaOpts.contains(TezConstants.TEZ_CONTAINER_LOG4J_PROPERTIES_FILE) + && javaOpts.contains("-Dlog4j.configuratorClass=org.apache.tez.common.TezLog4jConfigurator")); + Assert.assertTrue(javaOpts.contains("-DtestProperty=value")); + } + @Test (timeout = 5000) public void testConfSerializationForAm() { Configuration conf =new Configuration(false); @@ -890,5 +931,29 @@ public void testClusterTaskLaunchCmdOptsSetup() throws TezException { } + @Test + public void testSessionCredentialsMergedBeforeAmConfigCredentials() throws Exception { + TezConfiguration conf = new TezConfiguration(); + Text tokenType = new Text("TEST_TOKEN_TYPE"); + Text tokenKind = new Text("TEST_TOKEN_KIND"); + Text tokenService = new Text("TEST_TOKEN_SERVICE"); + + Credentials amConfigCredentials = new Credentials(); + amConfigCredentials.addToken(tokenType, + new Token<>("id1".getBytes(), null, tokenKind, tokenService)); + + Credentials sessionCredentials = new Credentials(); + Token sessionToken = + new Token<>("id2".getBytes(), null, tokenKind, tokenService); + sessionCredentials.addToken(tokenType, sessionToken); + + AMConfiguration amConfig = new AMConfiguration(conf, null, amConfigCredentials); + Credentials amLaunchCredentials = + TezClientUtils.prepareAmLaunchCredentials(amConfig, sessionCredentials, conf, null); + + // if there is another token in am conf creds of the same token type, + // session token should be applied while creating ContainerLaunchContext + Assert.assertEquals(sessionToken, amLaunchCredentials.getToken(tokenType)); + } } diff --git a/tez-api/src/test/java/org/apache/tez/common/TestReflectionUtils.java b/tez-api/src/test/java/org/apache/tez/common/TestReflectionUtils.java index 2fbd35cb3d..ed3814d000 100644 --- a/tez-api/src/test/java/org/apache/tez/common/TestReflectionUtils.java +++ b/tez-api/src/test/java/org/apache/tez/common/TestReflectionUtils.java @@ -58,7 +58,7 @@ public void testConstructorWithParameters() throws TezReflectionException @Test(timeout = 5000) public void testAddResourceToClasspath() throws IOException, TezException { - + TezClassLoader.setupTezClassLoader(); String rsrcName = "dummyfile.xml"; FileSystem localFs = FileSystem.getLocal(new Configuration()); Path p = new Path(rsrcName); @@ -78,7 +78,7 @@ public void testAddResourceToClasspath() throws IOException, TezException { urlForm = urlForm.substring(0, urlForm.lastIndexOf('/') + 1); URL url = new URL(urlForm); - ReflectionUtils.addResourcesToClasspath(Collections.singletonList(url)); + ReflectionUtils.addResourcesToSystemClassLoader(Collections.singletonList(url)); loadedUrl = Thread.currentThread().getContextClassLoader().getResource(rsrcName); diff --git a/tez-api/src/test/java/org/apache/tez/common/TestTezCommonUtils.java b/tez-api/src/test/java/org/apache/tez/common/TestTezCommonUtils.java index 3929c4bf5a..e1ae6cd2cb 100644 --- a/tez-api/src/test/java/org/apache/tez/common/TestTezCommonUtils.java +++ b/tez-api/src/test/java/org/apache/tez/common/TestTezCommonUtils.java @@ -18,6 +18,7 @@ package org.apache.tez.common; +import java.io.File; import java.io.IOException; import java.util.Map; @@ -25,6 +26,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.CommonConfigurationKeys; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hdfs.MiniDFSCluster; @@ -43,8 +45,11 @@ public class TestTezCommonUtils { private static final String STAGE_DIR = "/tmp/mystage"; + + private static final File LOCAL_STAGING_DIR = new File(System.getProperty("test.build.data"), + TestTezCommonUtils.class.getSimpleName()).getAbsoluteFile(); private static String RESOLVED_STAGE_DIR; - private static Configuration conf = new Configuration();; + private static Configuration conf = new Configuration(); private static String TEST_ROOT_DIR = "target" + Path.SEPARATOR + TestTezCommonUtils.class.getName() + "-tmpDir"; private static MiniDFSCluster dfsCluster = null; @@ -84,10 +89,10 @@ public static void afterClass() throws InterruptedException { public void testTezBaseStagingPath() throws Exception { Configuration localConf = new Configuration(); // Check if default works with localFS - localConf.unset(TezConfiguration.TEZ_AM_STAGING_DIR); + localConf.set(TezConfiguration.TEZ_AM_STAGING_DIR, LOCAL_STAGING_DIR.getAbsolutePath()); localConf.set("fs.defaultFS", "file:///"); Path stageDir = TezCommonUtils.getTezBaseStagingPath(localConf); - Assert.assertEquals(stageDir.toString(), "file:" + TezConfiguration.TEZ_AM_STAGING_DIR_DEFAULT); + Assert.assertEquals("file:" + LOCAL_STAGING_DIR, stageDir.toString()); // check if user set something, indeed works conf.set(TezConfiguration.TEZ_AM_STAGING_DIR, STAGE_DIR); @@ -409,4 +414,17 @@ public void testGetDAGSessionTimeout() { } + @Test + public void testMkDirForAM() throws IOException { + Configuration remoteConf = new Configuration(); + remoteConf.set(MiniDFSCluster.HDFS_MINIDFS_BASEDIR, TEST_ROOT_DIR); + remoteConf.set(CommonConfigurationKeys.FS_PERMISSIONS_UMASK_KEY, "777"); + MiniDFSCluster miniDFS = new MiniDFSCluster.Builder(remoteConf).numDataNodes(3).format(true).racks(null) + .build(); + FileSystem remoteFileSystem = miniDFS.getFileSystem(); + Path path = new Path(TEST_ROOT_DIR + "/testMkDirForAM"); + TezCommonUtils.mkDirForAM(remoteFileSystem, path); + Assert.assertEquals(TezCommonUtils.TEZ_AM_DIR_PERMISSION, remoteFileSystem.getFileStatus(path).getPermission()); + miniDFS.shutdown(); + } } diff --git a/tez-api/src/test/java/org/apache/tez/common/TestVersionInfo.java b/tez-api/src/test/java/org/apache/tez/common/TestVersionInfo.java index 17ff3d1655..67f97b86a5 100644 --- a/tez-api/src/test/java/org/apache/tez/common/TestVersionInfo.java +++ b/tez-api/src/test/java/org/apache/tez/common/TestVersionInfo.java @@ -18,44 +18,41 @@ package org.apache.tez.common; -import java.io.IOException; - import org.junit.Assert; import org.junit.Test; - public class TestVersionInfo { - final String version = "0.6.0-SNAPSHOT"; - final String revision = "d523db65804a5742ce50824e6fcfb8a04d184c0d"; - final String buildTime = "20141024-1052"; - final String scmUrl = "scm:git:https://git-wip-us.apache.org/repos/asf/tez.git"; + private static final String VERSION = "0.6.0-SNAPSHOT"; + private static final String REVISION = "d523db65804a5742ce50824e6fcfb8a04d184c0d"; + private static final String BUILD_TIME = "20141024-1052"; + private static final String SCM_URL = "scm:git:https://gitbox.apache.org/repos/asf/tez.git"; @Test(timeout = 5000) public void testTest1File() { VersionInfo versionInfo = new VersionInfo("test1"); - Assert.assertEquals(version, versionInfo.getVersion()); - Assert.assertEquals(revision, versionInfo.getRevision()); - Assert.assertEquals(buildTime, versionInfo.getBuildTime()); - Assert.assertEquals(scmUrl, versionInfo.getSCMURL()); + Assert.assertEquals(VERSION, versionInfo.getVersion()); + Assert.assertEquals(REVISION, versionInfo.getRevision()); + Assert.assertEquals(BUILD_TIME, versionInfo.getBuildTime()); + Assert.assertEquals(SCM_URL, versionInfo.getSCMURL()); } @Test(timeout = 5000) public void testTest2File() { VersionInfo versionInfo = new VersionInfo("test2"); - Assert.assertEquals(version, versionInfo.getVersion()); - Assert.assertEquals(revision, versionInfo.getRevision()); - Assert.assertEquals(buildTime, versionInfo.getBuildTime()); + Assert.assertEquals(VERSION, versionInfo.getVersion()); + Assert.assertEquals(REVISION, versionInfo.getRevision()); + Assert.assertEquals(BUILD_TIME, versionInfo.getBuildTime()); Assert.assertEquals(VersionInfo.UNKNOWN, versionInfo.getSCMURL()); } @Test(timeout = 5000) public void testTest3File() { VersionInfo versionInfo = new VersionInfo("test3"); - Assert.assertEquals(version, versionInfo.getVersion()); - Assert.assertEquals(revision, versionInfo.getRevision()); + Assert.assertEquals(VERSION, versionInfo.getVersion()); + Assert.assertEquals(REVISION, versionInfo.getRevision()); Assert.assertEquals("", versionInfo.getBuildTime()); - Assert.assertEquals(scmUrl, versionInfo.getSCMURL()); + Assert.assertEquals(SCM_URL, versionInfo.getSCMURL()); } @Test(timeout = 5000) diff --git a/tez-api/src/test/java/org/apache/tez/common/security/TestTokenCache.java b/tez-api/src/test/java/org/apache/tez/common/security/TestTokenCache.java index 59488b6fb8..4c144174c7 100644 --- a/tez-api/src/test/java/org/apache/tez/common/security/TestTokenCache.java +++ b/tez-api/src/test/java/org/apache/tez/common/security/TestTokenCache.java @@ -18,7 +18,7 @@ package org.apache.tez.common.security; -import static org.mockito.Matchers.any; +import static org.mockito.Mockito.any; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.times; import static org.mockito.Mockito.verify; @@ -113,6 +113,11 @@ public void testObtainTokensForFileSystems() throws Exception { conf.setBoolean("fs.test.impl.disable.cache", true); TokenCache.obtainTokensForFileSystemsInternal(creds, paths, conf); verify(TestFileSystem.fs, times(paths.length + 1)).addDelegationTokens(renewer, creds); + + // Excluded filesystem tokens should not be obtained. + conf.set("tez.job.fs-servers.token-renewal.exclude", "dir"); + TokenCache.obtainTokensForFileSystemsInternal(creds, paths, conf); + verify(TestFileSystem.fs, times(paths.length + 1)).addDelegationTokens(renewer, creds); } private Path[] makePaths(int count, String prefix) throws Exception { @@ -127,7 +132,7 @@ public static class TestFileSystem extends FilterFileSystem { static final FileSystem fs = mock(FileSystem.class); static { try { - when(fs.getUri()).thenReturn(new URI("test:///")); + when(fs.getUri()).thenReturn(new URI("test://dir")); } catch (URISyntaxException e) { throw new RuntimeException(e); } @@ -148,7 +153,7 @@ private MockFileSystem createFileSystemForServiceName(final String service) throws IOException { MockFileSystem mockFs = new MockFileSystem(); when(mockFs.getCanonicalServiceName()).thenReturn(service); - when(mockFs.getDelegationToken(any(String.class))).thenAnswer( + when(mockFs.getDelegationToken(any())).thenAnswer( new Answer>() { int unique = 0; @Override diff --git a/tez-api/src/test/java/org/apache/tez/dag/api/TestDAG.java b/tez-api/src/test/java/org/apache/tez/dag/api/TestDAG.java index 05c4e30cf0..5ec57c4148 100644 --- a/tez-api/src/test/java/org/apache/tez/dag/api/TestDAG.java +++ b/tez-api/src/test/java/org/apache/tez/dag/api/TestDAG.java @@ -47,7 +47,7 @@ public void testDuplicatedVertices() { dummyTaskCount, dummyTaskResource); Vertex v2 = Vertex.create("v1", ProcessorDescriptor.create("Processor"), dummyTaskCount, dummyTaskResource); - DAG dag = DAG.create("testDAG"); + DAG dag = DAG.create("DAG-testDuplicatedVertices"); dag.addVertex(v1); try { dag.addVertex(v2); @@ -74,7 +74,7 @@ public void testDuplicatedEdges() { SchedulingType.CONCURRENT, OutputDescriptor.create("output"), InputDescriptor.create("input"))); - DAG dag = DAG.create("testDAG"); + DAG dag = DAG.create("DAG-testDuplicatedEdges"); dag.addVertex(v1); dag.addVertex(v2); dag.addEdge(edge1); @@ -96,7 +96,7 @@ public void testDuplicatedVertexGroup() { Vertex v3 = Vertex.create("v3", ProcessorDescriptor.create("Processor"), dummyTaskCount, dummyTaskResource); - DAG dag = DAG.create("testDAG"); + DAG dag = DAG.create("DAG-testDuplicatedVertexGroup"); dag.createVertexGroup("group_1", v1, v2); try { @@ -123,7 +123,7 @@ public void testDuplicatedGroupInputEdge() { ProcessorDescriptor.create("Processor"), dummyTaskCount, dummyTaskResource); - DAG dag = DAG.create("testDag"); + DAG dag = DAG.create("DAG-testDuplicatedGroupInputEdge"); String groupName1 = "uv12"; VertexGroup uv12 = dag.createVertexGroup(groupName1, v1, v2); @@ -156,7 +156,7 @@ public void testDuplicatedGroupInputEdge() { @Test(timeout = 5000) public void testDAGConf() { - DAG dag = DAG.create("dag1"); + DAG dag = DAG.create("DAG-testDAGConf"); // it's OK to set custom configuration dag.setConf("unknown_conf", "value"); @@ -281,7 +281,7 @@ public void testDuplicatedOutput_1() { @Test(timeout = 5000) public void testDuplicatedOutput_2() { - DAG dag = DAG.create("dag1"); + DAG dag = DAG.create("DAG-testDuplicatedOutput_2"); Vertex v1 = Vertex.create("v1", ProcessorDescriptor.create("dummyProcessor")); DataSinkDescriptor dataSink = DataSinkDescriptor.create(OutputDescriptor.create("dummyOutput"), null, null); @@ -354,7 +354,7 @@ public void testRecreateDAG() { Resource.newInstance(1, 1)); Vertex v2 = Vertex.create("v2", ProcessorDescriptor.create("dummyProcessor2"), 1, Resource.newInstance(1, 1)); - DAG dag = DAG.create("dag1").addVertex(v1).addVertex(v2).addTaskLocalFiles(lrDAG); + DAG dag = DAG.create("DAG-testRecreateDAG").addVertex(v1).addVertex(v2).addTaskLocalFiles(lrDAG); TezConfiguration tezConf = new TezConfiguration(); DAGPlan firstPlan = dag.createDag(tezConf, null, null, null, false); @@ -375,7 +375,7 @@ public void testCreateDAGForHistoryLogLevel() { Resource.newInstance(1, 1)); Vertex v2 = Vertex.create("v2", ProcessorDescriptor.create("dummyProcessor2"), 1, Resource.newInstance(1, 1)); - DAG dag = DAG.create("dag1").addVertex(v1).addVertex(v2).addTaskLocalFiles(lrDAG); + DAG dag = DAG.create("DAG-testCreateDAGForHistoryLogLevel").addVertex(v1).addVertex(v2).addTaskLocalFiles(lrDAG); TezConfiguration tezConf = new TezConfiguration(); diff --git a/tez-api/src/test/java/org/apache/tez/dag/api/TestDAGPlan.java b/tez-api/src/test/java/org/apache/tez/dag/api/TestDAGPlan.java index 8e1011feb7..d5f6b0af9c 100644 --- a/tez-api/src/test/java/org/apache/tez/dag/api/TestDAGPlan.java +++ b/tez-api/src/test/java/org/apache/tez/dag/api/TestDAGPlan.java @@ -107,7 +107,7 @@ public void testBasicJobPlanSerde() throws IOException { @Test(timeout = 5000) public void testEdgeManagerSerde() { - DAG dag = DAG.create("testDag"); + DAG dag = DAG.create("DAG-testEdgeManagerSerde"); ProcessorDescriptor pd1 = ProcessorDescriptor.create("processor1") .setUserPayload(UserPayload.create(ByteBuffer.wrap("processor1Bytes".getBytes()))); ProcessorDescriptor pd2 = ProcessorDescriptor.create("processor2") @@ -144,7 +144,7 @@ public void testEdgeManagerSerde() { @Test(timeout = 5000) public void testUserPayloadSerde() { - DAG dag = DAG.create("testDag"); + DAG dag = DAG.create("DAG-testUserPayloadSerde"); ProcessorDescriptor pd1 = ProcessorDescriptor.create("processor1"). setUserPayload(UserPayload.create(ByteBuffer.wrap("processor1Bytes".getBytes()))); ProcessorDescriptor pd2 = ProcessorDescriptor.create("processor2"). @@ -205,7 +205,7 @@ public void testUserPayloadSerde() { @Test(timeout = 5000) public void userVertexOrderingIsMaintained() { - DAG dag = DAG.create("testDag"); + DAG dag = DAG.create("DAG-userVertexOrderingIsMaintained"); ProcessorDescriptor pd1 = ProcessorDescriptor.create("processor1"). setUserPayload(UserPayload.create(ByteBuffer.wrap("processor1Bytes".getBytes()))); ProcessorDescriptor pd2 = ProcessorDescriptor.create("processor2"). @@ -278,7 +278,7 @@ public void userVertexOrderingIsMaintained() { @Test (timeout=5000) public void testCredentialsSerde() { - DAG dag = DAG.create("testDag"); + DAG dag = DAG.create("DAG-testCredentialsSerde"); ProcessorDescriptor pd1 = ProcessorDescriptor.create("processor1"). setUserPayload(UserPayload.create(ByteBuffer.wrap("processor1Bytes".getBytes()))); ProcessorDescriptor pd2 = ProcessorDescriptor.create("processor2"). @@ -322,7 +322,7 @@ public void testCredentialsSerde() { @Test(timeout = 5000) public void testInvalidExecContext_1() { - DAG dag = DAG.create("dag1"); + DAG dag = DAG.create("DAG-testInvalidExecContext_1"); dag.setExecutionContext(VertexExecutionContext.createExecuteInAm(true)); Vertex v1 = Vertex.create("testvertex", ProcessorDescriptor.create("processor1"), 1); dag.addVertex(v1); @@ -364,7 +364,7 @@ public void testInvalidExecContext_2() { VertexExecutionContext.create("plugin", "plugin", "invalidplugin"); - DAG dag = DAG.create("dag1"); + DAG dag = DAG.create("DAG-testInvalidExecContext_2"); dag.setExecutionContext(VertexExecutionContext.createExecuteInContainers(true)); Vertex v1 = Vertex.create("testvertex", ProcessorDescriptor.create("processor1"), 1); dag.addVertex(v1); @@ -429,7 +429,7 @@ public void testInvalidExecContext_2() { @Test(timeout = 5000) public void testServiceDescriptorPropagation() { - DAG dag = DAG.create("testDag"); + DAG dag = DAG.create("DAG-testServiceDescriptorPropagation"); ProcessorDescriptor pd1 = ProcessorDescriptor.create("processor1"). setUserPayload(UserPayload.create(ByteBuffer.wrap("processor1Bytes".getBytes()))); ProcessorDescriptor pd2 = ProcessorDescriptor.create("processor2"). @@ -492,7 +492,7 @@ public void testServiceDescriptorPropagation() { @Test(timeout = 5000) public void testInvalidJavaOpts() { - DAG dag = DAG.create("testDag"); + DAG dag = DAG.create("DAG-testInvalidJavaOpts"); ProcessorDescriptor pd1 = ProcessorDescriptor.create("processor1") .setUserPayload(UserPayload.create(ByteBuffer.wrap("processor1Bytes".getBytes()))); Vertex v1 = Vertex.create("v1", pd1, 10, Resource.newInstance(1024, 1)); diff --git a/tez-api/src/test/java/org/apache/tez/dag/api/TestDAGVerify.java b/tez-api/src/test/java/org/apache/tez/dag/api/TestDAGVerify.java index e3c40aaef0..1ba877769f 100644 --- a/tez-api/src/test/java/org/apache/tez/dag/api/TestDAGVerify.java +++ b/tez-api/src/test/java/org/apache/tez/dag/api/TestDAGVerify.java @@ -73,7 +73,7 @@ public void testVerifyScatterGather() { DataSourceType.PERSISTED, SchedulingType.SEQUENTIAL, OutputDescriptor.create(dummyOutputClassName), InputDescriptor.create(dummyInputClassName))); - DAG dag = DAG.create("testDag"); + DAG dag = DAG.create("DAG-testVerifyScatterGather"); dag.addVertex(v1); dag.addVertex(v2); dag.addEdge(e1); @@ -94,7 +94,7 @@ public void testVerifyCustomEdge() { SchedulingType.SEQUENTIAL, OutputDescriptor.create(dummyOutputClassName), InputDescriptor.create(dummyInputClassName))); - DAG dag = DAG.create("testDag"); + DAG dag = DAG.create("DAG-testVerifyCustomEdge"); dag.addVertex(v1); dag.addVertex(v2); dag.addEdge(e1); @@ -114,13 +114,13 @@ public void testVerifyOneToOne() { DataSourceType.PERSISTED, SchedulingType.SEQUENTIAL, OutputDescriptor.create(dummyOutputClassName), InputDescriptor.create(dummyInputClassName))); - DAG dag = DAG.create("testDag"); + DAG dag = DAG.create("DAG-testVerifyOneToOne"); dag.addVertex(v1); dag.addVertex(v2); dag.addEdge(e1); dag.verify(); } - + @Test(timeout = 5000) // v1 (known) -> v2 (-1) -> v3 (-1) public void testVerifyOneToOneInferParallelism() { @@ -143,7 +143,7 @@ public void testVerifyOneToOneInferParallelism() { DataSourceType.PERSISTED, SchedulingType.SEQUENTIAL, OutputDescriptor.create(dummyOutputClassName), InputDescriptor.create(dummyInputClassName))); - DAG dag = DAG.create("testDag"); + DAG dag = DAG.create("DAG-testVerifyOneToOneInferParallelism"); dag.addVertex(v1); dag.addVertex(v2); dag.addVertex(v3); @@ -153,7 +153,7 @@ public void testVerifyOneToOneInferParallelism() { Assert.assertEquals(dummyTaskCount, v2.getParallelism()); Assert.assertEquals(dummyTaskCount, v3.getParallelism()); } - + @Test(timeout = 5000) // v1 (known) -> v2 (-1) -> v3 (-1) // The test checks resiliency to ordering of the vertices/edges @@ -177,7 +177,7 @@ public void testVerifyOneToOneInferParallelismReverseOrder() { DataSourceType.PERSISTED, SchedulingType.SEQUENTIAL, OutputDescriptor.create(dummyOutputClassName), InputDescriptor.create(dummyInputClassName))); - DAG dag = DAG.create("testDag"); + DAG dag = DAG.create("DAG-testVerifyOneToOneInferParallelismReverseOrder"); dag.addVertex(v3); dag.addVertex(v1); dag.addVertex(v2); @@ -187,7 +187,7 @@ public void testVerifyOneToOneInferParallelismReverseOrder() { Assert.assertEquals(dummyTaskCount, v2.getParallelism()); Assert.assertEquals(dummyTaskCount, v3.getParallelism()); } - + @Test(timeout = 5000) public void testVerifyOneToOneNoInferParallelism() { Vertex v1 = Vertex.create("v1", @@ -204,14 +204,14 @@ public void testVerifyOneToOneNoInferParallelism() { DataSourceType.PERSISTED, SchedulingType.SEQUENTIAL, OutputDescriptor.create(dummyOutputClassName), InputDescriptor.create(dummyInputClassName))); - DAG dag = DAG.create("testDag"); + DAG dag = DAG.create("DAG-testVerifyOneToOneNoInferParallelism"); dag.addVertex(v1); dag.addVertex(v2); dag.addEdge(e1); dag.verify(); Assert.assertEquals(-1, v2.getParallelism()); } - + @Test(timeout = 5000) // v1 (-1) -> v2 (known) -> v3 (-1) public void testVerifyOneToOneIncorrectParallelism1() { @@ -234,7 +234,7 @@ public void testVerifyOneToOneIncorrectParallelism1() { DataSourceType.PERSISTED, SchedulingType.SEQUENTIAL, OutputDescriptor.create(dummyOutputClassName), InputDescriptor.create(dummyInputClassName))); - DAG dag = DAG.create("testDag"); + DAG dag = DAG.create("DAG-testVerifyOneToOneIncorrectParallelism1"); dag.addVertex(v1); dag.addVertex(v2); dag.addVertex(v3); @@ -280,7 +280,7 @@ public void testVerifyOneToOneIncorrectParallelism2() { DataSourceType.PERSISTED, SchedulingType.SEQUENTIAL, OutputDescriptor.create(dummyOutputClassName), InputDescriptor.create(dummyInputClassName))); - DAG dag = DAG.create("testDag"); + DAG dag = DAG.create("DAG-testVerifyOneToOneIncorrectParallelism2"); dag.addVertex(v1); dag.addVertex(v2); dag.addVertex(v3); @@ -290,13 +290,13 @@ public void testVerifyOneToOneIncorrectParallelism2() { dag.addEdge(e3); try { dag.verify(); - Assert.assertTrue(false); + Assert.fail(); } catch (TezUncheckedException e) { Assert.assertTrue(e.getMessage().contains( "1-1 Edge. Destination vertex parallelism must match source vertex")); } } - + @Test(timeout = 5000) public void testVerifyBroadcast() { Vertex v1 = Vertex.create("v1", @@ -310,14 +310,14 @@ public void testVerifyBroadcast() { DataSourceType.PERSISTED, SchedulingType.SEQUENTIAL, OutputDescriptor.create(dummyOutputClassName), InputDescriptor.create(dummyInputClassName))); - DAG dag = DAG.create("testDag"); + DAG dag = DAG.create("DAG-testVerifyBroadcast"); dag.addVertex(v1); dag.addVertex(v2); dag.addEdge(e1); dag.verify(); } - @Test(expected = IllegalStateException.class, timeout = 5000) + @Test(timeout = 5000) public void testVerify3() { Vertex v1 = Vertex.create("v1", ProcessorDescriptor.create(dummyProcessorClassName), @@ -330,14 +330,14 @@ public void testVerify3() { DataSourceType.EPHEMERAL, SchedulingType.SEQUENTIAL, OutputDescriptor.create(dummyOutputClassName), InputDescriptor.create(dummyInputClassName))); - DAG dag = DAG.create("testDag"); + DAG dag = DAG.create("DAG-testVerify3"); dag.addVertex(v1); dag.addVertex(v2); dag.addEdge(e1); dag.verify(); } - @Test(expected = IllegalStateException.class, timeout = 5000) + @Test(timeout = 5000) public void testVerify4() { Vertex v1 = Vertex.create("v1", ProcessorDescriptor.create(dummyProcessorClassName), @@ -350,7 +350,7 @@ public void testVerify4() { DataSourceType.EPHEMERAL, SchedulingType.CONCURRENT, OutputDescriptor.create(dummyOutputClassName), InputDescriptor.create(dummyInputClassName))); - DAG dag = DAG.create("testDag"); + DAG dag = DAG.create("DAG-testVerify4"); dag.addVertex(v1); dag.addVertex(v2); dag.addEdge(e1); @@ -397,7 +397,7 @@ public void testCycle1() { DataSourceType.PERSISTED, SchedulingType.SEQUENTIAL, OutputDescriptor.create("dummy output class"), InputDescriptor.create("dummy input class"))); - DAG dag = DAG.create("testDag"); + DAG dag = DAG.create("DAG-testCycle1"); dag.addVertex(v1); dag.addVertex(v2); dag.addVertex(v3); @@ -457,7 +457,7 @@ public void testCycle2() { DataSourceType.PERSISTED, SchedulingType.SEQUENTIAL, OutputDescriptor.create("dummy output class"), InputDescriptor.create("dummy input class"))); - DAG dag = DAG.create("testDag"); + DAG dag = DAG.create("DAG-testCycle2"); dag.addVertex(v1); dag.addVertex(v2); dag.addVertex(v3); @@ -489,7 +489,7 @@ public void testSelfCycle(){ DataSourceType.PERSISTED, SchedulingType.SEQUENTIAL, OutputDescriptor.create("dummy output class"), InputDescriptor.create("dummy input class"))); - DAG dag = DAG.create("testDag"); + DAG dag = DAG.create("DAG-testSelfCycle"); dag.addVertex(v1); dag.addEdge(e1); try{ @@ -513,7 +513,7 @@ public void repeatedVertexName() { ProcessorDescriptor.create("MapProcessor"), dummyTaskCount, dummyTaskResource); try { - DAG dag = DAG.create("testDag"); + DAG dag = DAG.create("DAG-repeatedVertexName"); dag.addVertex(v1); dag.addVertex(v1repeat); dag.verify(); @@ -525,7 +525,7 @@ public void repeatedVertexName() { System.out.println(ex.getMessage()); Assert.assertTrue(ex.getMessage().startsWith("Vertex v1 already defined")); } - + @Test(expected = IllegalStateException.class, timeout = 5000) public void testInputAndInputVertexNameCollision() { Vertex v1 = Vertex.create("v1", @@ -534,22 +534,22 @@ public void testInputAndInputVertexNameCollision() { Vertex v2 = Vertex.create("v2", ProcessorDescriptor.create("MapProcessor"), dummyTaskCount, dummyTaskResource); - + v2.addDataSource("v1", DataSourceDescriptor.create(null, null, null)); - + Edge e1 = Edge.create(v1, v2, EdgeProperty.create(DataMovementType.SCATTER_GATHER, DataSourceType.PERSISTED, SchedulingType.SEQUENTIAL, OutputDescriptor.create("dummy output class"), InputDescriptor.create("dummy input class"))); - - DAG dag = DAG.create("testDag"); + + DAG dag = DAG.create("DAG-testInputAndInputVertexNameCollision"); dag.addVertex(v1); dag.addVertex(v2); dag.addEdge(e1); dag.verify(); } - + @Test(expected = IllegalStateException.class, timeout = 5000) public void testOutputAndOutputVertexNameCollision() { Vertex v1 = Vertex.create("v1", @@ -558,22 +558,22 @@ public void testOutputAndOutputVertexNameCollision() { Vertex v2 = Vertex.create("v2", ProcessorDescriptor.create("MapProcessor"), dummyTaskCount, dummyTaskResource); - + v1.addDataSink("v2", DataSinkDescriptor.create(null, null, null)); - + Edge e1 = Edge.create(v1, v2, EdgeProperty.create(DataMovementType.SCATTER_GATHER, DataSourceType.PERSISTED, SchedulingType.SEQUENTIAL, OutputDescriptor.create("dummy output class"), InputDescriptor.create("dummy input class"))); - - DAG dag = DAG.create("testDag"); + + DAG dag = DAG.create("DAG-testOutputAndOutputVertexNameCollision"); dag.addVertex(v1); dag.addVertex(v2); dag.addEdge(e1); dag.verify(); } - + @Test(expected = IllegalStateException.class, timeout = 5000) public void testOutputAndVertexNameCollision() { Vertex v1 = Vertex.create("v1", @@ -582,15 +582,15 @@ public void testOutputAndVertexNameCollision() { Vertex v2 = Vertex.create("v2", ProcessorDescriptor.create("MapProcessor"), dummyTaskCount, dummyTaskResource); - + v1.addDataSink("v2", DataSinkDescriptor.create(null, null, null)); - - DAG dag = DAG.create("testDag"); + + DAG dag = DAG.create("DAG-testOutputAndVertexNameCollision"); dag.addVertex(v1); dag.addVertex(v2); dag.verify(); } - + @Test(expected = IllegalStateException.class, timeout = 5000) public void testInputAndVertexNameCollision() { Vertex v1 = Vertex.create("v1", @@ -599,10 +599,10 @@ public void testInputAndVertexNameCollision() { Vertex v2 = Vertex.create("v2", ProcessorDescriptor.create("MapProcessor"), dummyTaskCount, dummyTaskResource); - + v1.addDataSource("v2", DataSourceDescriptor.create(null, null, null)); - - DAG dag = DAG.create("testDag"); + + DAG dag = DAG.create("DAG-testInputAndVertexNameCollision"); dag.addVertex(v1); dag.addVertex(v2); dag.verify(); @@ -632,7 +632,7 @@ public void BinaryInputAllowed() { DataSourceType.PERSISTED, SchedulingType.SEQUENTIAL, OutputDescriptor.create("dummy output class"), InputDescriptor.create("dummy input class"))); - DAG dag = DAG.create("testDag"); + DAG dag = DAG.create("DAG-BinaryInputAllowed"); dag.addVertex(v1); dag.addVertex(v2); dag.addVertex(v3); @@ -640,7 +640,7 @@ public void BinaryInputAllowed() { dag.addEdge(e2); dag.verify(); } - + @Test(timeout = 5000) public void testVertexGroupWithMultipleOutputEdges() { Vertex v1 = Vertex.create("v1", @@ -655,19 +655,19 @@ public void testVertexGroupWithMultipleOutputEdges() { Vertex v4 = Vertex.create("v4", ProcessorDescriptor.create("Processor"), dummyTaskCount, dummyTaskResource); - - DAG dag = DAG.create("testDag"); + + DAG dag = DAG.create("DAG-testVertexGroupWithMultipleOutputEdges"); VertexGroup uv12 = dag.createVertexGroup("uv12", v1, v2); OutputDescriptor outDesc = new OutputDescriptor(); uv12.addDataSink("uvOut", DataSinkDescriptor.create(outDesc, null, null)); - + GroupInputEdge e1 = GroupInputEdge.create(uv12, v3, EdgeProperty.create(DataMovementType.SCATTER_GATHER, DataSourceType.PERSISTED, SchedulingType.SEQUENTIAL, OutputDescriptor.create("dummy output class"), InputDescriptor.create("dummy input class")), InputDescriptor.create("dummy input class")); - + GroupInputEdge e2 = GroupInputEdge.create(uv12, v4, EdgeProperty.create(DataMovementType.SCATTER_GATHER, DataSourceType.PERSISTED, SchedulingType.SEQUENTIAL, @@ -685,7 +685,7 @@ public void testVertexGroupWithMultipleOutputEdges() { for (int i = 0; i< 10;++i){ dag.verify(); // should be OK when called multiple times } - + Assert.assertEquals(2, v1.getOutputVertices().size()); Assert.assertEquals(2, v2.getOutputVertices().size()); Assert.assertTrue(v1.getOutputVertices().contains(v3)); @@ -693,7 +693,7 @@ public void testVertexGroupWithMultipleOutputEdges() { Assert.assertTrue(v2.getOutputVertices().contains(v3)); Assert.assertTrue(v2.getOutputVertices().contains(v4)); } - + @Test(timeout = 5000) public void testVertexGroup() { Vertex v1 = Vertex.create("v1", @@ -711,16 +711,16 @@ public void testVertexGroup() { Vertex v5 = Vertex.create("v5", ProcessorDescriptor.create("Processor"), dummyTaskCount, dummyTaskResource); - - DAG dag = DAG.create("testDag"); + + DAG dag = DAG.create("DAG-testVertexGroup"); String groupName1 = "uv12"; VertexGroup uv12 = dag.createVertexGroup(groupName1, v1, v2); OutputDescriptor outDesc = new OutputDescriptor(); uv12.addDataSink("uvOut", DataSinkDescriptor.create(outDesc, null, null)); - + String groupName2 = "uv23"; VertexGroup uv23 = dag.createVertexGroup(groupName2, v2, v3); - + GroupInputEdge e1 = GroupInputEdge.create(uv12, v4, EdgeProperty.create(DataMovementType.SCATTER_GATHER, DataSourceType.PERSISTED, SchedulingType.SEQUENTIAL, @@ -733,7 +733,7 @@ public void testVertexGroup() { OutputDescriptor.create("dummy output class"), InputDescriptor.create("dummy input class")), InputDescriptor.create("dummy input class")); - + dag.addVertex(v1); dag.addVertex(v2); dag.addVertex(v3); @@ -744,7 +744,7 @@ public void testVertexGroup() { for (int i = 0; i< 10;++i){ dag.verify(); // should be OK when called multiple times } - + // for the first Group v1 and v2 should get connected to v4 and also have 1 output // for the second Group v2 and v3 should get connected to v5 // the Group place holders should disappear @@ -775,7 +775,7 @@ public void testVertexGroup() { Assert.assertTrue(v5.getGroupInputs().containsKey(groupName2)); Assert.assertEquals(2, dag.vertexGroups.size()); } - + @Test(timeout = 5000) public void testVertexGroupOneToOne() { Vertex v1 = Vertex.create("v1", @@ -793,16 +793,16 @@ public void testVertexGroupOneToOne() { Vertex v5 = Vertex.create("v5", ProcessorDescriptor.create("Processor"), -1, dummyTaskResource); - - DAG dag = DAG.create("testDag"); + + DAG dag = DAG.create("DAG-testVertexGroupOneToOne"); String groupName1 = "uv12"; VertexGroup uv12 = dag.createVertexGroup(groupName1, v1, v2); OutputDescriptor outDesc = new OutputDescriptor(); uv12.addDataSink("uvOut", DataSinkDescriptor.create(outDesc, null, null)); - + String groupName2 = "uv23"; VertexGroup uv23 = dag.createVertexGroup(groupName2, v2, v3); - + GroupInputEdge e1 = GroupInputEdge.create(uv12, v4, EdgeProperty.create(DataMovementType.ONE_TO_ONE, DataSourceType.PERSISTED, SchedulingType.SEQUENTIAL, @@ -815,7 +815,7 @@ public void testVertexGroupOneToOne() { OutputDescriptor.create("dummy output class"), InputDescriptor.create("dummy input class")), InputDescriptor.create("dummy input class")); - + dag.addVertex(v1); dag.addVertex(v2); dag.addVertex(v3); @@ -826,7 +826,7 @@ public void testVertexGroupOneToOne() { for (int i = 0; i< 10;++i){ dag.verify(); // should be OK when called multiple times } - + Assert.assertEquals(dummyTaskCount, v5.getParallelism()); } @@ -856,7 +856,7 @@ public void BinaryOutput() { DataSourceType.PERSISTED, SchedulingType.SEQUENTIAL, OutputDescriptor.create("dummy output class"), InputDescriptor.create("dummy input class"))); - DAG dag = DAG.create("testDag"); + DAG dag = DAG.create("DAG-BinaryOutput"); dag.addVertex(v1); dag.addVertex(v2); dag.addVertex(v3); @@ -874,7 +874,7 @@ public void BinaryOutput() { public void testDagWithNoVertices() { IllegalStateException ex=null; try { - DAG dag = DAG.create("testDag"); + DAG dag = DAG.create("DAG-testDagWithNoVertices"); dag.verify(); } catch (IllegalStateException e){ @@ -921,7 +921,7 @@ public void testInvalidVertexConstruction() { @Test(timeout = 5000) public void testMultipleRootInputsAllowed() { - DAG dag = DAG.create("testDag"); + DAG dag = DAG.create("DAG-testMultipleRootInputsAllowed"); ProcessorDescriptor pd1 = ProcessorDescriptor.create("processor1") .setUserPayload(UserPayload.create(ByteBuffer.wrap("processor1Bytes".getBytes()))); Vertex v1 = Vertex.create("v1", pd1, 10, Resource.newInstance(1024, 1)); @@ -941,8 +941,8 @@ public void testMultipleRootInputsAllowed() { dag.createDag(new TezConfiguration(), null, null, null, true); } - - + + @Test(timeout = 5000) public void testDAGCreateDataInference() { Vertex v1 = Vertex.create("v1", ProcessorDescriptor.create(dummyProcessorClassName)); @@ -954,7 +954,7 @@ public void testDAGCreateDataInference() { String lrName2 = "LR2"; lrs2.put(lrName2, LocalResource.newInstance(URL.newInstance("file", "localhost", 0, "/test1"), LocalResourceType.FILE, LocalResourceVisibility.PUBLIC, 1, 1)); - + Set hosts = Sets.newHashSet(); hosts.add("h1"); hosts.add("h2"); @@ -962,11 +962,11 @@ public void testDAGCreateDataInference() { taskLocationHints.add(TaskLocationHint.createTaskLocationHint(hosts, null)); taskLocationHints.add(TaskLocationHint.createTaskLocationHint(hosts, null)); VertexLocationHint vLoc = VertexLocationHint.create(taskLocationHints); - DataSourceDescriptor ds = DataSourceDescriptor.create(InputDescriptor.create("I.class"), + DataSourceDescriptor ds = DataSourceDescriptor.create(InputDescriptor.create("I.class"), InputInitializerDescriptor.create(dummyInputInitClassName), dummyTaskCount, null, vLoc, lrs2); v1.addDataSource("i1", ds); - - DAG dag = DAG.create("testDag"); + + DAG dag = DAG.create("DAG-testDAGCreateDataInference"); dag.addVertex(v1); dag.addTaskLocalFiles(lrs1); DAGPlan dagPlan = dag.createDag(new TezConfiguration(), null, null, null, true); @@ -1003,11 +1003,11 @@ public void testInferredFilesFail() { Assert.assertTrue(e.getMessage().contains("Duplicate Resources found with different size")); } - DataSourceDescriptor ds = DataSourceDescriptor.create(InputDescriptor.create("I.class"), + DataSourceDescriptor ds = DataSourceDescriptor.create(InputDescriptor.create("I.class"), null, -1, null, null, lrs2); v1.addDataSource("i1", ds); - - DAG dag = DAG.create("testDag"); + + DAG dag = DAG.create("DAG-testInferredFilesFail"); dag.addVertex(v1); dag.addTaskLocalFiles(lrs); try { @@ -1024,10 +1024,10 @@ public void testInferredFilesFail() { Assert.assertTrue(e.getMessage().contains("Duplicate Resources found with different size")); } } - + @Test(timeout = 5000) public void testDAGAccessControls() { - DAG dag = DAG.create("testDag"); + DAG dag = DAG.create("DAG-testDAGAccessControls"); ProcessorDescriptor pd1 = ProcessorDescriptor.create("processor1") .setUserPayload(UserPayload.create(ByteBuffer.wrap("processor1Bytes".getBytes()))); Vertex v1 = Vertex.create("v1", pd1, 10, Resource.newInstance(1024, 1)); @@ -1055,7 +1055,7 @@ public void testDAGAccessControls() { // v1 has input initializer @Test(timeout = 5000) public void testDAGInvalidParallelism1() { - DAG dag = DAG.create("testDAG"); + DAG dag = DAG.create("DAG-testDAGInvalidParallelism1"); Vertex v1 = Vertex.create("v1", ProcessorDescriptor.create(dummyProcessorClassName)); dag.addVertex(v1); try { @@ -1076,7 +1076,7 @@ public void testDAGInvalidParallelism1() { // v1 has custom vertex manager @Test(timeout = 5000) public void testDAGInvalidParallelism2() { - DAG dag = DAG.create("testDAG"); + DAG dag = DAG.create("DAG-testDAGInvalidParallelism2"); Vertex v1 = Vertex.create("v1", ProcessorDescriptor.create(dummyProcessorClassName)); dag.addVertex(v1); try { @@ -1095,7 +1095,7 @@ public void testDAGInvalidParallelism2() { // v1 has 1-1 united source vertex v0 which has input initializer @Test(timeout = 5000) public void testDAGInvalidParallelism3() { - DAG dag = DAG.create("testDAG"); + DAG dag = DAG.create("DAG-testDAGInvalidParallelism3"); Vertex v1 = Vertex.create("v1", ProcessorDescriptor.create(dummyProcessorClassName)); dag.addVertex(v1); try { @@ -1122,7 +1122,7 @@ public void testDAGInvalidParallelism3() { // v1 has an 1-1 united parent v0 which has custom vertex manager @Test//(timeout = 5000) public void testDAGInvalidParallelism4() { - DAG dag = DAG.create("testDAG"); + DAG dag = DAG.create("DAG-testDAGInvalidParallelism4"); Vertex v1 = Vertex.create("v1", ProcessorDescriptor.create(dummyProcessorClassName)); dag.addVertex(v1); try { @@ -1146,7 +1146,7 @@ public void testDAGInvalidParallelism4() { @Test public void testDAGWithSplitsOnClient() { - DAG dag = DAG.create("testDag"); + DAG dag = DAG.create("DAG-testDAGWithSplitsOnClient"); // Mimic map which has a data source and shards set when splits are generated in the client Vertex v1 = Vertex.create("v1", ProcessorDescriptor.create(dummyProcessorClassName)); @@ -1170,7 +1170,7 @@ public void testDAGWithSplitsOnClient() { // Verifies failure in case of a file size difference. Does not verify sha differences. @Test(timeout = 5000) public void testDAGWithConflictingResource() { - DAG dag = DAG.create("dag"); + DAG dag = DAG.create("DAG-testDAGWithConflictingResource"); Map localResourceMap = new HashMap<>(); String commonResourceKey = "local resource"; localResourceMap.put("lr", LocalResource.newInstance(null, LocalResourceType.FILE, diff --git a/tez-api/src/test/java/org/apache/tez/dag/api/TestDagTypeConverters.java b/tez-api/src/test/java/org/apache/tez/dag/api/TestDagTypeConverters.java index 265fce9d43..edb7fd8445 100644 --- a/tez-api/src/test/java/org/apache/tez/dag/api/TestDagTypeConverters.java +++ b/tez-api/src/test/java/org/apache/tez/dag/api/TestDagTypeConverters.java @@ -34,8 +34,10 @@ import org.apache.tez.common.TezCommonUtils; import org.apache.tez.common.security.DAGAccessControls; import org.apache.tez.dag.api.Vertex.VertexExecutionContext; +import org.apache.tez.dag.api.client.StatusGetOpts; import org.apache.tez.dag.api.records.DAGProtos.ACLInfo; import org.apache.tez.dag.api.records.DAGProtos.AMPluginDescriptorProto; +import org.apache.tez.dag.api.records.DAGProtos.StatusGetOptsProto; import org.apache.tez.dag.api.records.DAGProtos.TezEntityDescriptorProto; import org.apache.tez.dag.api.records.DAGProtos.TezNamedEntityDescriptorProto; import org.apache.tez.dag.api.records.DAGProtos.VertexExecutionContextProto; @@ -235,6 +237,23 @@ public void testAclConversions() { assertSame(DagTypeConverters.convertDAGAccessControlsFromProto(aclInfo), aclInfo); } + /* + * This unit test can catch if a StatusGetOpts <-> StatusGetOptsProto value is not defined at any + * side. + */ + @Test + public void testConvertStatusGetOptsToProtoCoverage() { + StatusGetOpts[] opts = StatusGetOpts.values(); + for (StatusGetOpts opt : opts) { + DagTypeConverters.convertStatusGetOptsToProto(opt); + } + + StatusGetOptsProto[] optProtos = StatusGetOptsProto.values(); + for (StatusGetOptsProto proto : optProtos) { + DagTypeConverters.convertStatusGetOptsFromProto(proto); + } + } + private void assertSame(DAGAccessControls dagAccessControls, ACLInfo aclInfo) { assertEquals(dagAccessControls.getUsersWithViewACLs(), Sets.newHashSet(aclInfo.getUsersWithViewAccessList())); diff --git a/tez-api/src/test/java/org/apache/tez/dag/api/TestEntityDescriptor.java b/tez-api/src/test/java/org/apache/tez/dag/api/TestEntityDescriptor.java index 1e8a99db99..82c22ad4ef 100644 --- a/tez-api/src/test/java/org/apache/tez/dag/api/TestEntityDescriptor.java +++ b/tez-api/src/test/java/org/apache/tez/dag/api/TestEntityDescriptor.java @@ -26,38 +26,76 @@ import org.apache.commons.lang.RandomStringUtils; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.io.DataOutputBuffer; import org.apache.tez.common.TezUtils; import org.junit.Assert; import org.junit.Test; +import static org.mockito.Mockito.any; +import static org.mockito.Mockito.eq; +import static org.mockito.Mockito.spy; +import static org.mockito.Mockito.verify; + public class TestEntityDescriptor { - @Test + public void verifyResults(InputDescriptor entityDescriptor, InputDescriptor deserialized, UserPayload payload, + String confVal) throws IOException { + Assert.assertEquals(entityDescriptor.getClassName(), deserialized.getClassName()); + // History text is not serialized when sending to tasks + Assert.assertNull(deserialized.getHistoryText()); + Assert.assertArrayEquals(payload.deepCopyAsArray(), deserialized.getUserPayload().deepCopyAsArray()); + Configuration deserializedConf = TezUtils.createConfFromUserPayload(deserialized.getUserPayload()); + Assert.assertEquals(confVal, deserializedConf.get("testKey")); + } + + public void testSingularWrite(InputDescriptor entityDescriptor, InputDescriptor deserialized, UserPayload payload, + String confVal) throws IOException { + DataOutputBuffer out = new DataOutputBuffer(); + entityDescriptor.write(out); + out.close(); + ByteArrayOutputStream bos = new ByteArrayOutputStream(out.getData().length); + bos.write(out.getData()); + + verify(entityDescriptor).writeSingular(eq(out), any()); + deserialized.readFields(new DataInputStream(new ByteArrayInputStream(bos.toByteArray()))); + verifyResults(entityDescriptor, deserialized, payload, confVal); + } + + public void testSegmentedWrite(InputDescriptor entityDescriptor, InputDescriptor deserialized, UserPayload payload, + String confVal) throws IOException { + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + DataOutputStream out = new DataOutputStream(bos); + entityDescriptor.write(out); + out.close(); + + verify(entityDescriptor).writeSegmented(eq(out), any()); + deserialized.readFields(new DataInputStream(new ByteArrayInputStream(bos.toByteArray()))); + verifyResults(entityDescriptor, deserialized, payload, confVal); + } + + @Test (timeout=3000) public void testEntityDescriptorHadoopSerialization() throws IOException { - // This tests the alternate serialization code path - // if the DataOutput is not DataOutputBuffer + /* This tests the alternate serialization code path + * if the DataOutput is not DataOutputBuffer + * AND, if it indeed is, with a read/write payload */ Configuration conf = new Configuration(true); String confVal = RandomStringUtils.random(10000, true, true); conf.set("testKey", confVal); UserPayload payload = TezUtils.createUserPayloadFromConf(conf); + + InputDescriptor deserialized = InputDescriptor.create("dummy"); InputDescriptor entityDescriptor = InputDescriptor.create("inputClazz").setUserPayload(payload) - .setHistoryText("Bar123"); + .setHistoryText("Bar123"); + InputDescriptor entityDescriptorLivingInFear = spy(entityDescriptor); - ByteArrayOutputStream bos = new ByteArrayOutputStream(); - DataOutputStream out = new DataOutputStream(bos); - entityDescriptor.write(out); - out.close(); + testSingularWrite(entityDescriptorLivingInFear, deserialized, payload, confVal); - InputDescriptor deserialized = InputDescriptor.create("dummy"); - deserialized.readFields(new DataInputStream(new ByteArrayInputStream(bos.toByteArray()))); - - Assert.assertEquals(entityDescriptor.getClassName(), deserialized.getClassName()); - // History text is not serialized when sending to tasks - Assert.assertNull(deserialized.getHistoryText()); - Assert.assertArrayEquals(payload.deepCopyAsArray(), deserialized.getUserPayload().deepCopyAsArray()); - Configuration deserializedConf = TezUtils.createConfFromUserPayload(deserialized.getUserPayload()); - Assert.assertEquals(confVal, deserializedConf.get("testKey")); + /* make read-only payload */ + payload = UserPayload.create(payload.getPayload()); + entityDescriptor = InputDescriptor.create("inputClazz").setUserPayload(payload) + .setHistoryText("Bar123"); + entityDescriptorLivingInFear = spy(entityDescriptor); + testSegmentedWrite(entityDescriptorLivingInFear, deserialized, payload, confVal); } - } diff --git a/tez-api/src/test/java/org/apache/tez/dag/api/client/rpc/TestDAGClient.java b/tez-api/src/test/java/org/apache/tez/dag/api/client/rpc/TestDAGClient.java index 70ee1d495d..8d52aaf3b9 100644 --- a/tez-api/src/test/java/org/apache/tez/dag/api/client/rpc/TestDAGClient.java +++ b/tez-api/src/test/java/org/apache/tez/dag/api/client/rpc/TestDAGClient.java @@ -20,32 +20,43 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; -import static org.mockito.Matchers.*; +import static org.mockito.Mockito.any; +import static org.mockito.Mockito.argThat; import static org.mockito.Mockito.doAnswer; +import static org.mockito.Mockito.isNull; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.times; import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; import javax.annotation.Nullable; + +import java.io.File; import java.io.IOException; +import java.lang.reflect.Field; import java.util.EnumSet; import java.util.Set; +import java.util.concurrent.atomic.AtomicReference; +import org.apache.hadoop.security.UserGroupInformation; +import org.apache.hadoop.security.ssl.KeyStoreTestUtil; import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.ApplicationReport; -import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.exceptions.YarnException; import org.apache.tez.client.FrameworkClient; +import org.apache.tez.common.CachedEntity; +import org.apache.tez.dag.api.NoCurrentDAGException; import org.apache.tez.dag.api.TezConfiguration; import org.apache.tez.dag.api.TezException; import org.apache.tez.dag.api.client.DAGClient; import org.apache.tez.dag.api.client.DAGClientImpl; import org.apache.tez.dag.api.client.DAGClientTimelineImpl; import org.apache.tez.dag.api.client.DAGStatus; +import org.apache.tez.dag.api.client.DAGStatus.State; import org.apache.tez.dag.api.client.DagStatusSource; import org.apache.tez.dag.api.client.StatusGetOpts; import org.apache.tez.dag.api.client.VertexStatus; +import org.apache.tez.dag.api.client.TimelineReaderFactory.TimelineReaderStrategy; import org.apache.tez.dag.api.client.rpc.DAGClientAMProtocolRPC.GetDAGStatusRequestProto; import org.apache.tez.dag.api.client.rpc.DAGClientAMProtocolRPC.GetDAGStatusResponseProto; import org.apache.tez.dag.api.client.rpc.DAGClientAMProtocolRPC.GetVertexStatusRequestProto; @@ -62,6 +73,7 @@ import org.apache.tez.dag.api.records.DAGProtos.TezCountersProto; import org.apache.tez.dag.api.records.DAGProtos.VertexStatusProto; import org.apache.tez.dag.api.records.DAGProtos.VertexStatusStateProto; +import org.junit.Assert; import org.junit.Before; import org.junit.Test; import org.mockito.ArgumentCaptor; @@ -146,6 +158,7 @@ private void setUpData(){ .build(); vertexStatusProtoWithoutCounters = VertexStatusProto.newBuilder() + .setId("vertex_1") .addDiagnostics("V_Diagnostics_0") .setProgress(vertexProgressProto) .setState(VertexStatusStateProto.VERTEX_SUCCEEDED) // make sure the waitForCompletion be able to finish @@ -154,30 +167,22 @@ private void setUpData(){ .setVertexCounters(vertexCountersProto) .build(); } - - private static class DAGCounterRequestMatcher extends ArgumentMatcher{ + + private static class DAGCounterRequestMatcher implements ArgumentMatcher{ @Override - public boolean matches(Object argument) { - if (argument instanceof GetDAGStatusRequestProto){ - GetDAGStatusRequestProto requestProto = (GetDAGStatusRequestProto)argument; - return requestProto.getStatusOptionsCount() != 0 - && requestProto.getStatusOptionsList().get(0) == StatusGetOptsProto.GET_COUNTERS; - } - return false; + public boolean matches(GetDAGStatusRequestProto requestProto) { + return requestProto != null && requestProto.getStatusOptionsCount() != 0 + && requestProto.getStatusOptionsList().get(0) == StatusGetOptsProto.GET_COUNTERS; } } - - private static class VertexCounterRequestMatcher extends ArgumentMatcher{ + + private static class VertexCounterRequestMatcher implements ArgumentMatcher{ @Override - public boolean matches(Object argument) { - if (argument instanceof GetVertexStatusRequestProto){ - GetVertexStatusRequestProto requestProto = (GetVertexStatusRequestProto)argument; - return requestProto.getStatusOptionsCount() != 0 - && requestProto.getStatusOptionsList().get(0) == StatusGetOptsProto.GET_COUNTERS; - } - return false; + public boolean matches(GetVertexStatusRequestProto requestProto) { + return requestProto != null && requestProto.getStatusOptionsCount() != 0 + && requestProto.getStatusOptionsList().get(0) == StatusGetOptsProto.GET_COUNTERS; } } @@ -192,19 +197,19 @@ public void setUp() throws YarnException, IOException, TezException, ServiceExce dagIdStr = "dag_9999_0001_1"; mockProxy = mock(DAGClientAMProtocolBlockingPB.class); // return the response with Counters is the request match the CounterMatcher - when(mockProxy.getDAGStatus(isNull(RpcController.class), any(GetDAGStatusRequestProto.class))) + when(mockProxy.getDAGStatus(isNull(), any())) .thenReturn(GetDAGStatusResponseProto.newBuilder().setDagStatus(dagStatusProtoWithoutCounters).build()); - when(mockProxy.getDAGStatus(isNull(RpcController.class), argThat(new DAGCounterRequestMatcher()))) + when(mockProxy.getDAGStatus(isNull(), argThat(new DAGCounterRequestMatcher()))) .thenReturn(GetDAGStatusResponseProto.newBuilder().setDagStatus(dagStatusProtoWithCounters).build()); - when(mockProxy.getVertexStatus(isNull(RpcController.class), any(GetVertexStatusRequestProto.class))) + when(mockProxy.getVertexStatus(isNull(), any())) .thenReturn(GetVertexStatusResponseProto.newBuilder().setVertexStatus(vertexStatusProtoWithoutCounters).build()); - when(mockProxy.getVertexStatus(isNull(RpcController.class), argThat(new VertexCounterRequestMatcher()))) + when(mockProxy.getVertexStatus(isNull(), argThat(new VertexCounterRequestMatcher()))) .thenReturn(GetVertexStatusResponseProto.newBuilder().setVertexStatus(vertexStatusProtoWithCounters).build()); TezConfiguration tezConf = new TezConfiguration(); - YarnConfiguration yarnConf = new YarnConfiguration(tezConf); - dagClient = new DAGClientImpl(mockAppId, dagIdStr, tezConf, yarnConf, null); + dagClient = new DAGClientImpl(mockAppId, dagIdStr, tezConf, null, + UserGroupInformation.getCurrentUser()); DAGClientRPCImpl realClient = (DAGClientRPCImpl)((DAGClientImpl)dagClient).getRealClient(); realClient.appReport = mockAppReport; realClient.proxy = mockProxy; @@ -260,7 +265,7 @@ public void testTryKillDAG() throws Exception{ @Test(timeout = 5000) public void testWaitForCompletion() throws Exception{ // first time return DAG_RUNNING, second time return DAG_SUCCEEDED - when(mockProxy.getDAGStatus(isNull(RpcController.class), any(GetDAGStatusRequestProto.class))) + when(mockProxy.getDAGStatus(isNull(), any())) .thenReturn(GetDAGStatusResponseProto.newBuilder().setDagStatus(dagStatusProtoWithoutCounters) .build()) .thenReturn(GetDAGStatusResponseProto.newBuilder().setDagStatus @@ -281,7 +286,7 @@ public void testWaitForCompletion() throws Exception{ public void testWaitForCompletionWithStatusUpdates() throws Exception{ // first time and second time return DAG_RUNNING, third time return DAG_SUCCEEDED - when(mockProxy.getDAGStatus(isNull(RpcController.class), any(GetDAGStatusRequestProto.class))) + when(mockProxy.getDAGStatus(isNull(), any())) .thenReturn(GetDAGStatusResponseProto.newBuilder().setDagStatus( DAGStatusProto.newBuilder(dagStatusProtoWithCounters) .setState(DAGStatusStateProto.DAG_RUNNING).build()).build()) @@ -307,7 +312,7 @@ public void testWaitForCompletionWithStatusUpdates() throws Exception{ verify(mockProxy, times(4)) .getDAGStatus(rpcControllerArgumentCaptor.capture(), argumentCaptor.capture()); - when(mockProxy.getDAGStatus(isNull(RpcController.class), any(GetDAGStatusRequestProto.class))) + when(mockProxy.getDAGStatus(isNull(), any())) .thenReturn(GetDAGStatusResponseProto.newBuilder().setDagStatus( DAGStatusProto.newBuilder(dagStatusProtoWithCounters) .setState(DAGStatusStateProto.DAG_RUNNING).build()).build()) @@ -340,17 +345,14 @@ public void testGetDagStatusWithTimeout() throws Exception { TezConfiguration tezConf = new TezConfiguration(); tezConf.setLong(TezConfiguration.TEZ_DAG_STATUS_POLLINTERVAL_MS, 800l); - YarnConfiguration yarnConf = new YarnConfiguration(tezConf); - DAGClientImplForTest dagClient = new DAGClientImplForTest(mockAppId, dagIdStr, tezConf, - yarnConf,null); + DAGClientImplForTest dagClient = new DAGClientImplForTest(mockAppId, dagIdStr, tezConf, null); DAGClientRPCImplForTest dagClientRpc = new DAGClientRPCImplForTest(mockAppId, dagIdStr, tezConf, null); dagClient.setRealClient(dagClientRpc); DAGStatus dagStatus; - // Fetch from RM. AM not up yet. dagClientRpc.setAMProxy(null); DAGStatus rmDagStatus = @@ -370,7 +372,7 @@ public void testGetDagStatusWithTimeout() throws Exception { // Fetch from AM. RUNNING dagClient.resetCounters(); - dagClientRpc.resetCountesr(); + dagClientRpc.resetCounters(); rmDagStatus = new DAGStatus(constructDagStatusProto(DAGStatusStateProto.DAG_RUNNING), DagStatusSource.RM); dagClient.setRmDagStatus(rmDagStatus); @@ -390,7 +392,7 @@ public void testGetDagStatusWithTimeout() throws Exception { // Fetch from AM. Success. dagClient.resetCounters(); - dagClientRpc.resetCountesr(); + dagClientRpc.resetCounters(); rmDagStatus = new DAGStatus(constructDagStatusProto(DAGStatusStateProto.DAG_RUNNING), DagStatusSource.RM); dagClient.setRmDagStatus(rmDagStatus); @@ -410,7 +412,7 @@ public void testGetDagStatusWithTimeout() throws Exception { } @Test(timeout = 5000) - public void testDagClientTimelineEnabledCondition() { + public void testDagClientTimelineEnabledCondition() throws IOException { String historyLoggingClass = "org.apache.tez.dag.history.logging.ats.ATSHistoryLoggingService"; testAtsEnabled(mockAppId, dagIdStr, false, "", true, true); @@ -422,34 +424,33 @@ public void testDagClientTimelineEnabledCondition() { private static void testAtsEnabled(ApplicationId appId, String dagIdStr, boolean expected, String loggingClass, boolean amHistoryLoggingEnabled, - boolean dagHistoryLoggingEnabled) { + boolean dagHistoryLoggingEnabled) throws IOException { TezConfiguration tezConf = new TezConfiguration(); - YarnConfiguration yarnConf = new YarnConfiguration(tezConf); tezConf.set(TezConfiguration.TEZ_HISTORY_LOGGING_SERVICE_CLASS, loggingClass); tezConf.setBoolean(TezConfiguration.TEZ_AM_HISTORY_LOGGING_ENABLED, amHistoryLoggingEnabled); tezConf.setBoolean(TezConfiguration.TEZ_DAG_HISTORY_LOGGING_ENABLED, dagHistoryLoggingEnabled); - DAGClientImplForTest dagClient = new DAGClientImplForTest(appId, dagIdStr, tezConf, - yarnConf,null); + DAGClientImplForTest dagClient = new DAGClientImplForTest(appId, dagIdStr, tezConf, null); assertEquals(expected, dagClient.getIsATSEnabled()); } private static class DAGClientRPCImplForTest extends DAGClientRPCImpl { - + private AtomicReference faultAMInjectedRef; int numGetStatusViaAmInvocations = 0; public DAGClientRPCImplForTest(ApplicationId appId, String dagId, TezConfiguration conf, - @Nullable FrameworkClient frameworkClient) { - super(appId, dagId, conf, frameworkClient); + @Nullable FrameworkClient frameworkClient) throws IOException { + super(appId, dagId, conf, frameworkClient, UserGroupInformation.getCurrentUser()); + faultAMInjectedRef = new AtomicReference<>(null); } void setAMProxy(DAGClientAMProtocolBlockingPB proxy) { this.proxy = proxy; } - void resetCountesr() { + void resetCounters() { numGetStatusViaAmInvocations = 0; } @@ -466,19 +467,25 @@ boolean createAMProxyIfNeeded() throws IOException, TezException { DAGStatus getDAGStatusViaAM(Set statusOptions, long timeout) throws IOException, TezException { numGetStatusViaAmInvocations++; + if (faultAMInjectedRef.get() != null) { + throw faultAMInjectedRef.get(); + } return super.getDAGStatusViaAM(statusOptions, timeout); } + + void injectAMFault(TezException exception) { + faultAMInjectedRef.set(exception); + } } private static class DAGClientImplForTest extends DAGClientImpl { private DAGStatus rmDagStatus; int numGetStatusViaRmInvocations = 0; - + private volatile boolean faultInjected; public DAGClientImplForTest(ApplicationId appId, String dagId, TezConfiguration conf, - YarnConfiguration yarnConf, - @Nullable FrameworkClient frameworkClient) { - super(appId, dagId, conf, yarnConf, frameworkClient); + @Nullable FrameworkClient frameworkClient) throws IOException { + super(appId, dagId, conf, frameworkClient, UserGroupInformation.getCurrentUser()); } private void setRealClient(DAGClientRPCImplForTest dagClientRpcImplForTest) { @@ -496,6 +503,9 @@ void resetCounters() { @Override protected DAGStatus getDAGStatusViaRM() throws TezException, IOException { numGetStatusViaRmInvocations++; + if (faultInjected) { + throw new IOException("Fault Injected for RM"); + } return rmDagStatus; } @@ -503,6 +513,18 @@ public boolean getIsATSEnabled() { return isATSEnabled; } + void injectFault() { + faultInjected = true; + } + + DAGStatus getCachedDAGStatus() { + CachedEntity cacheRef = getCachedDAGStatusRef(); + return cacheRef.getValue(); + } + + void enforceExpirationCachedDAGStatus() { + getCachedDAGStatusRef().enforceExpiration(); + } } private DAGProtos.DAGStatusProto.Builder constructDagStatusProto(DAGStatusStateProto stateProto) { @@ -516,7 +538,7 @@ private DAGClientAMProtocolBlockingPB createMockProxy(final DAGStatusStateProto ServiceException { DAGClientAMProtocolBlockingPB mock = mock(DAGClientAMProtocolBlockingPB.class); - doAnswer(new Answer() { + doAnswer(new Answer() { @Override public Object answer(InvocationOnMock invocation) throws Throwable { GetDAGStatusRequestProto request = (GetDAGStatusRequestProto) invocation.getArguments()[1]; @@ -528,7 +550,185 @@ public Object answer(InvocationOnMock invocation) throws Throwable { return GetDAGStatusResponseProto.newBuilder().setDagStatus(constructDagStatusProto( stateProto)).build(); } - }).when(mock).getDAGStatus(isNull(RpcController.class), any(GetDAGStatusRequestProto.class)); + }).when(mock).getDAGStatus(isNull(), any()); return mock; } + + @Test + /* testing idea is borrowed from YARN-5309 */ + public void testTimelineClientCleanup() throws Exception { + TezConfiguration tezConf = new TezConfiguration(); + tezConf.set("yarn.http.policy", "HTTPS_ONLY"); + + File testDir = new File(System.getProperty("java.io.tmpdir")); + String sslConfDir = KeyStoreTestUtil.getClasspathDir(TestDAGClient.class); + KeyStoreTestUtil.setupSSLConfig(testDir.getAbsolutePath(), sslConfDir, tezConf, false); + + DAGClientTimelineImpl dagClient = + new DAGClientTimelineImpl(mockAppId, dagIdStr, tezConf, mock(FrameworkClient.class), 10000); + Field field = DAGClientTimelineImpl.class.getDeclaredField("timelineReaderStrategy"); + field.setAccessible(true); + TimelineReaderStrategy strategy = (TimelineReaderStrategy) field.get(dagClient); + strategy.getHttpClient(); // calls SSLFactory.init + + ThreadGroup threadGroup = Thread.currentThread().getThreadGroup(); + + while (threadGroup.getParent() != null) { + threadGroup = threadGroup.getParent(); + } + + Thread[] threads = new Thread[threadGroup.activeCount()]; + + threadGroup.enumerate(threads); + Thread reloaderThread = null; + for (Thread thread : threads) { + /* Since HADOOP-16524, the reloader thread's name is changed, let's handle the backward compatibility + * with a simple OR, as this is just a unit test, it's not worth involving a hadoop version check. + */ + if ((thread.getName() != null) && (thread.getName().contains("Truststore reloader thread")) + || (thread.getName().contains("SSL Certificates Store Monitor"))) { + reloaderThread = thread; + } + } + Assert.assertTrue("Reloader is not alive", reloaderThread.isAlive()); + + dagClient.close(); + boolean reloaderStillAlive = true; + for (int i = 0; i < 10; i++) { + reloaderStillAlive = reloaderThread.isAlive(); + if (!reloaderStillAlive) { + break; + } + Thread.sleep(1000); + } + Assert.assertFalse("Reloader is still alive", reloaderStillAlive); + } + + @Test(timeout = 50000) + public void testGetDagStatusWithCachedStatusExpiration() throws Exception { + long startTime; + long endTime; + long diff; + + TezConfiguration tezConf = new TezConfiguration(); + tezConf.setLong(TezConfiguration.TEZ_DAG_STATUS_POLLINTERVAL_MS, 800L); + tezConf.setLong(TezConfiguration.TEZ_CLIENT_DAG_STATUS_CACHE_TIMEOUT_SECS, 100000L); + try (DAGClientImplForTest dagClientImpl = + new DAGClientImplForTest(mockAppId, dagIdStr, tezConf, null)) { + DAGClientRPCImplForTest dagClientRpc = + new DAGClientRPCImplForTest(mockAppId, dagIdStr, tezConf, null); + dagClientImpl.setRealClient(dagClientRpc); + + DAGStatus dagStatus; + DAGStatus rmDagStatus; + + // Fetch from AM. RUNNING + rmDagStatus = + new DAGStatus(constructDagStatusProto(DAGStatusStateProto.DAG_RUNNING), + DagStatusSource.RM); + dagClientImpl.setRmDagStatus(rmDagStatus); + dagClientRpc.setAMProxy(createMockProxy(DAGStatusStateProto.DAG_RUNNING, -1)); + + startTime = System.currentTimeMillis(); + dagStatus = dagClientImpl.getDAGStatus(EnumSet.noneOf(StatusGetOpts.class), 2000L); + endTime = System.currentTimeMillis(); + diff = endTime - startTime; + assertTrue(diff > 1500L && diff < 2500L); + // Directly from AM + assertEquals(0, dagClientImpl.numGetStatusViaRmInvocations); + // Directly from AM - one refresh. One with timeout. + assertEquals(2, dagClientRpc.numGetStatusViaAmInvocations); + assertEquals(DAGStatus.State.RUNNING, dagStatus.getState()); + + // Fetch from AM. Success. + dagClientImpl.resetCounters(); + dagClientRpc.resetCounters(); + rmDagStatus = + new DAGStatus(constructDagStatusProto(DAGStatusStateProto.DAG_RUNNING), + DagStatusSource.RM); + dagClientImpl.setRmDagStatus(rmDagStatus); + dagClientRpc.setAMProxy(createMockProxy(DAGStatusStateProto.DAG_SUCCEEDED, 1000L)); + + startTime = System.currentTimeMillis(); + dagStatus = dagClientImpl.getDAGStatus(EnumSet.noneOf(StatusGetOpts.class), 2000L); + endTime = System.currentTimeMillis(); + diff = endTime - startTime; + assertTrue("diff is " + diff, diff > 500L && diff < 1500L); + // Directly from AM + assertEquals(0, dagClientImpl.numGetStatusViaRmInvocations); + // Directly from AM - previous request cached, so single invocation only. + assertEquals(1, dagClientRpc.numGetStatusViaAmInvocations); + assertEquals(DAGStatus.State.SUCCEEDED, dagStatus.getState()); + + // verify that the cachedDAGStatus is correct + DAGStatus cachedDagStatus = dagClientImpl.getCachedDAGStatus(); + Assert.assertNotNull(cachedDagStatus); + Assert.assertSame(dagStatus, cachedDagStatus); + + // When AM proxy throws an exception, the cachedDAGStatus should be returned + dagClientImpl.resetCounters(); + dagClientRpc.resetCounters(); + dagClientRpc.injectAMFault(new TezException("injected Fault")); + dagStatus = dagClientImpl.getDAGStatus(EnumSet.noneOf(StatusGetOpts.class)); + // get the Status from the cache + assertEquals(0, dagClientImpl.numGetStatusViaRmInvocations); + // Directly from AM - previous request cached, so single invocation only. + assertEquals(1, dagClientRpc.numGetStatusViaAmInvocations); + assertEquals(DAGStatus.State.SUCCEEDED, dagStatus.getState()); + Assert.assertSame(dagStatus, cachedDagStatus); + + // test that RM is invoked when the cacheExpires and the AM fails. + dagClientRpc.setAMProxy(createMockProxy(DAGStatusStateProto.DAG_SUCCEEDED, 1000L)); + dagClientRpc.injectAMFault(new TezException("injected AM Fault")); + dagClientImpl.resetCounters(); + dagClientRpc.resetCounters(); + dagClientImpl.enforceExpirationCachedDAGStatus(); + dagStatus = dagClientImpl.getDAGStatus(EnumSet.noneOf(StatusGetOpts.class)); + // get the Status from the cache + assertEquals(1, dagClientImpl.numGetStatusViaRmInvocations); + assertEquals(1, dagClientRpc.numGetStatusViaAmInvocations); + assertEquals(State.RUNNING, dagStatus.getState()); + Assert.assertNotSame(dagStatus, cachedDagStatus); + + // verify that the cachedDAGStatus is null because AM threw exception before setting the + // cache. + cachedDagStatus = dagClientImpl.getCachedDAGStatus(); + Assert.assertNull(cachedDagStatus); + Assert.assertNotNull(dagStatus); + + // inject fault in RM too. getDAGStatus should return null; + dagClientImpl.resetCounters(); + dagClientRpc.resetCounters(); + dagClientRpc.setAMProxy(createMockProxy(DAGStatusStateProto.DAG_SUCCEEDED, 1000L)); + dagClientImpl.injectFault(); + try { + dagClientImpl.getDAGStatus(EnumSet.noneOf(StatusGetOpts.class)); + Assert.fail("The RM should throw IOException"); + } catch (IOException ioException) { + Assert.assertEquals(ioException.getMessage(), "Fault Injected for RM"); + assertEquals(1, dagClientImpl.numGetStatusViaRmInvocations); + assertEquals(1, dagClientRpc.numGetStatusViaAmInvocations); + } + } + } + + @Test + public void testDagClientReturnsFailedDAGOnNoCurrentDAGException() throws Exception { + TezConfiguration tezConf = new TezConfiguration(); + tezConf.setBoolean(TezConfiguration.DAG_RECOVERY_ENABLED, false); + + try (DAGClientImplForTest dagClientImpl = new DAGClientImplForTest(mockAppId, dagIdStr, tezConf, null)) { + + DAGClientRPCImplForTest dagClientRpc = new DAGClientRPCImplForTest(mockAppId, dagIdStr, tezConf, null); + dagClientImpl.setRealClient(dagClientRpc); + + DAGClientAMProtocolBlockingPB mock = mock(DAGClientAMProtocolBlockingPB.class); + dagClientRpc.setAMProxy(mock); + dagClientRpc.injectAMFault(new NoCurrentDAGException("dag_0_0_0")); + + DAGStatus dagStatus = dagClientImpl.getDAGStatus(EnumSet.noneOf(StatusGetOpts.class), 2000L); + assertEquals(DAGStatus.State.FAILED, dagStatus.getState()); + assertEquals(NoCurrentDAGException.MESSAGE_PREFIX, dagStatus.getDiagnostics().get(0)); + } + } } diff --git a/tez-api/src/test/java/org/apache/tez/runtime/api/event/TestCompositeDataMovementEvent.java b/tez-api/src/test/java/org/apache/tez/runtime/api/events/TestCompositeDataMovementEvent.java similarity index 90% rename from tez-api/src/test/java/org/apache/tez/runtime/api/event/TestCompositeDataMovementEvent.java rename to tez-api/src/test/java/org/apache/tez/runtime/api/events/TestCompositeDataMovementEvent.java index 7dce6991a4..98c238a1dd 100644 --- a/tez-api/src/test/java/org/apache/tez/runtime/api/event/TestCompositeDataMovementEvent.java +++ b/tez-api/src/test/java/org/apache/tez/runtime/api/events/TestCompositeDataMovementEvent.java @@ -16,12 +16,10 @@ * limitations under the License. */ -package org.apache.tez.runtime.api.event; +package org.apache.tez.runtime.api.events; import java.nio.ByteBuffer; -import org.apache.tez.runtime.api.events.CompositeDataMovementEvent; -import org.apache.tez.runtime.api.events.DataMovementEvent; import org.junit.Assert; import org.junit.Test; diff --git a/tez-api/src/test/java/org/apache/tez/runtime/api/events/TestInputDataInformationEvent.java b/tez-api/src/test/java/org/apache/tez/runtime/api/events/TestInputDataInformationEvent.java new file mode 100644 index 0000000000..6e002e26c4 --- /dev/null +++ b/tez-api/src/test/java/org/apache/tez/runtime/api/events/TestInputDataInformationEvent.java @@ -0,0 +1,53 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tez.runtime.api.events; + +import java.nio.ByteBuffer; + +import org.junit.Assert; +import org.junit.Test; + +import com.google.common.base.Charsets; + +public class TestInputDataInformationEvent { + + @Test + public void testApiPayloadOrPath() { + InputDataInformationEvent eventWithSerializedPayload = + InputDataInformationEvent.createWithSerializedPayload(0, ByteBuffer.wrap("payload1".getBytes())); + // event created by createWithSerializedPayload should contain serialized payload + // but not a path or a deserialized payload + Assert.assertEquals("payload1", Charsets.UTF_8.decode(eventWithSerializedPayload.getUserPayload()).toString()); + Assert.assertNull(eventWithSerializedPayload.getSerializedPath()); + Assert.assertNull(eventWithSerializedPayload.getDeserializedUserPayload()); + + InputDataInformationEvent eventWithObjectPayload = InputDataInformationEvent.createWithObjectPayload(0, "payload2"); + // event created by eventWithObjectPayload should contain a deserialized payload + // but not a path or serialized payload + Assert.assertEquals("payload2", eventWithObjectPayload.getDeserializedUserPayload()); + Assert.assertNull(eventWithObjectPayload.getSerializedPath()); + Assert.assertNull(eventWithObjectPayload.getUserPayload()); + + InputDataInformationEvent eventWithPath = InputDataInformationEvent.createWithSerializedPath(0, "file://hello"); + // event created by createWithSerializedPath should contain a path + // but neither serialized nor deserialized payload + Assert.assertEquals("file://hello", eventWithPath.getSerializedPath()); + Assert.assertNull(eventWithPath.getUserPayload()); + Assert.assertNull(eventWithPath.getDeserializedUserPayload()); + } +} diff --git a/tez-api/src/test/resources/META-INF/LICENSE.txt b/tez-api/src/test/resources/META-INF/LICENSE similarity index 100% rename from tez-api/src/test/resources/META-INF/LICENSE.txt rename to tez-api/src/test/resources/META-INF/LICENSE diff --git a/tez-api/src/test/resources/META-INF/NOTICE.txt b/tez-api/src/test/resources/META-INF/NOTICE similarity index 69% rename from tez-api/src/test/resources/META-INF/NOTICE.txt rename to tez-api/src/test/resources/META-INF/NOTICE index 3f36fcc6ba..2595905699 100644 --- a/tez-api/src/test/resources/META-INF/NOTICE.txt +++ b/tez-api/src/test/resources/META-INF/NOTICE @@ -1,5 +1,5 @@ Apache Tez -Copyright (c) 2016 The Apache Software Foundation +Copyright 2014-2024 The Apache Software Foundation This product includes software developed at The Apache Software Foundation (http://www.apache.org/). diff --git a/tez-api/src/test/resources/test1-version-info.properties b/tez-api/src/test/resources/test1-version-info.properties index ebb4c03ce3..e2563d6cec 100644 --- a/tez-api/src/test/resources/test1-version-info.properties +++ b/tez-api/src/test/resources/test1-version-info.properties @@ -19,4 +19,4 @@ version=0.6.0-SNAPSHOT revision=d523db65804a5742ce50824e6fcfb8a04d184c0d buildtime=20141024-1052 -scmurl=scm:git:https://git-wip-us.apache.org/repos/asf/tez.git +scmurl=scm:git:https://gitbox.apache.org/repos/asf/tez.git diff --git a/tez-api/src/test/resources/test3-version-info.properties b/tez-api/src/test/resources/test3-version-info.properties index 401f382872..9def3fbf72 100644 --- a/tez-api/src/test/resources/test3-version-info.properties +++ b/tez-api/src/test/resources/test3-version-info.properties @@ -19,4 +19,4 @@ version=0.6.0-SNAPSHOT revision=d523db65804a5742ce50824e6fcfb8a04d184c0d buildtime= -scmurl=scm:git:https://git-wip-us.apache.org/repos/asf/tez.git +scmurl=scm:git:https://gitbox.apache.org/repos/asf/tez.git diff --git a/tez-build-tools/pom.xml b/tez-build-tools/pom.xml new file mode 100644 index 0000000000..521d19e6db --- /dev/null +++ b/tez-build-tools/pom.xml @@ -0,0 +1,26 @@ + + + + + 4.0.0 + + org.apache.tez + tez + 0.10.5-SNAPSHOT + + tez-build-tools + diff --git a/tez-build-tools/src/main/resources/checkstyle/checkstyle.xml b/tez-build-tools/src/main/resources/checkstyle/checkstyle.xml new file mode 100644 index 0000000000..cac5814e7f --- /dev/null +++ b/tez-build-tools/src/main/resources/checkstyle/checkstyle.xml @@ -0,0 +1,201 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tez-build-tools/src/main/resources/checkstyle/suppressions.xml b/tez-build-tools/src/main/resources/checkstyle/suppressions.xml new file mode 100644 index 0000000000..829751a139 --- /dev/null +++ b/tez-build-tools/src/main/resources/checkstyle/suppressions.xml @@ -0,0 +1,21 @@ + + + + + + + diff --git a/tez-common/pom.xml b/tez-common/pom.xml index 8a23335bea..c7f56392c8 100644 --- a/tez-common/pom.xml +++ b/tez-common/pom.xml @@ -20,14 +20,14 @@ org.apache.tez tez - 0.9.1-SNAPSHOT + 0.10.5-SNAPSHOT tez-common org.slf4j - slf4j-log4j12 + slf4j-reload4j com.google.guava @@ -63,7 +63,7 @@ org.mockito - mockito-all + mockito-core test diff --git a/tez-common/src/main/java/org/apache/tez/common/AsyncDispatcher.java b/tez-common/src/main/java/org/apache/tez/common/AsyncDispatcher.java index 3a59ff6449..af4c97a781 100644 --- a/tez-common/src/main/java/org/apache/tez/common/AsyncDispatcher.java +++ b/tez-common/src/main/java/org/apache/tez/common/AsyncDispatcher.java @@ -33,10 +33,10 @@ import org.apache.hadoop.yarn.event.Event; import org.apache.hadoop.yarn.event.EventHandler; import org.apache.hadoop.yarn.exceptions.YarnRuntimeException; +import org.apache.tez.dag.api.TezConfiguration; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.google.common.base.Preconditions; import com.google.common.collect.Maps; /** @@ -105,9 +105,12 @@ public void run() { Event event; try { event = eventQueue.take(); + if (LOG.isTraceEnabled()) { + LOG.trace("AsyncDispatcher taken event: {}", event); + } } catch(InterruptedException ie) { if (!stopped) { - LOG.warn("AsyncDispatcher thread interrupted", ie); + LOG.warn("AsyncDispatcher thread interrupted (while taking event)", ie); } return; } @@ -140,22 +143,39 @@ public void setDrainEventsOnStop() { @Override protected void serviceStop() throws Exception { + LOG.info("AsyncDispatcher serviceStop called, drainEventsOnStop: {}, drained: {}, eventQueue size: {}", + drainEventsOnStop, drained, eventQueue.size()); if (drainEventsOnStop) { blockNewEvents = true; LOG.info("AsyncDispatcher is draining to stop, ignoring any new events."); + long endTime = System.currentTimeMillis() + getConfig() + .getInt(TezConfiguration.TEZ_AM_DISPATCHER_DRAIN_EVENTS_TIMEOUT, + TezConfiguration.TEZ_AM_DISPATCHER_DRAIN_EVENTS_TIMEOUT_DEFAULT); + synchronized (waitForDrained) { - while (!drained && eventHandlingThread.isAlive()) { + while (!eventQueue.isEmpty() && eventHandlingThread.isAlive() && (System.currentTimeMillis() - endTime < 0)) { waitForDrained.wait(1000); - LOG.info("Waiting for AsyncDispatcher to drain."); + LOG.info( + "Waiting for AsyncDispatcher to drain. Current queue size: {}, handler thread state: {}", + eventQueue.size(), eventHandlingThread.getState()); } } - } stopped = true; if (eventHandlingThread != null) { eventHandlingThread.interrupt(); try { - eventHandlingThread.join(); + /* + * The event handling thread can be alive at this point, but in BLOCKED state, which leads + * to app hang, as a BLOCKED thread might never finish under some circumstances + */ + if (eventHandlingThread.getState() == Thread.State.BLOCKED) { + LOG.warn( + "eventHandlingThread is in BLOCKED state, let's not wait for it in order to prevent app hang"); + } else { + eventHandlingThread.join(); + LOG.info("joined event handling thread, state: {}", eventHandlingThread.getState()); + } } catch (InterruptedException ie) { LOG.warn("Interrupted Exception while stopping", ie); } @@ -182,6 +202,10 @@ protected void dispatch(Event event) { throw new Exception("No handler for registered for " + type); } } catch (Throwable t) { + if (t instanceof InterruptedException) { + LOG.warn("Interrupted Exception while handling event: " + event.getType(), t); + Thread.currentThread().interrupt(); + } LOG.error("Error in dispatcher thread", t); // If serviceStop is called, we should exit this thread gracefully. if (exitOnDispatchException @@ -345,9 +369,12 @@ public void handle(Event event) { } try { eventQueue.put(event); + if (LOG.isTraceEnabled()) { + LOG.trace("AsyncDispatcher put event: {}", event); + } } catch (InterruptedException e) { if (!stopped) { - LOG.warn("AsyncDispatcher thread interrupted", e); + LOG.warn("AsyncDispatcher thread interrupted (while putting event): {}", event, e); } throw new YarnRuntimeException(e); } diff --git a/tez-common/src/main/java/org/apache/tez/common/AsyncDispatcherConcurrent.java b/tez-common/src/main/java/org/apache/tez/common/AsyncDispatcherConcurrent.java index 4a632f5a61..f22159b46b 100644 --- a/tez-common/src/main/java/org/apache/tez/common/AsyncDispatcherConcurrent.java +++ b/tez-common/src/main/java/org/apache/tez/common/AsyncDispatcherConcurrent.java @@ -37,7 +37,6 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.google.common.base.Preconditions; import com.google.common.collect.Lists; import com.google.common.collect.Maps; import com.google.common.util.concurrent.ThreadFactoryBuilder; diff --git a/tez-common/src/main/java/org/apache/tez/common/DagContainerLauncher.java b/tez-common/src/main/java/org/apache/tez/common/DagContainerLauncher.java index e3bd385a1a..c2337af77e 100644 --- a/tez-common/src/main/java/org/apache/tez/common/DagContainerLauncher.java +++ b/tez-common/src/main/java/org/apache/tez/common/DagContainerLauncher.java @@ -20,11 +20,16 @@ import org.apache.hadoop.classification.InterfaceAudience.Public; import org.apache.hadoop.classification.InterfaceStability.Unstable; +import org.apache.hadoop.yarn.api.records.NodeId; import org.apache.tez.common.security.JobTokenSecretManager; import org.apache.tez.dag.records.TezDAGID; +import org.apache.tez.dag.records.TezTaskAttemptID; +import org.apache.tez.dag.records.TezVertexID; import org.apache.tez.serviceplugins.api.ContainerLauncher; import org.apache.tez.serviceplugins.api.ContainerLauncherContext; +import java.util.Set; + /** * Plugin to allow custom container launchers to be written to launch containers that want to * support cleanup of DAG level directories upon DAG completion in session mode. The directories are created by @@ -40,4 +45,10 @@ public DagContainerLauncher(ContainerLauncherContext containerLauncherContext) { } public abstract void dagComplete(TezDAGID dag, JobTokenSecretManager jobTokenSecretManager); + + public abstract void vertexComplete(TezVertexID vertex, JobTokenSecretManager jobTokenSecretManager, + Set nodeIdList); + + public abstract void taskAttemptFailed(TezTaskAttemptID taskAttemptID, + JobTokenSecretManager jobTokenSecretManager, NodeId nodeId); } diff --git a/tez-common/src/main/java/org/apache/tez/common/EnvironmentUpdateUtils.java b/tez-common/src/main/java/org/apache/tez/common/EnvironmentUpdateUtils.java deleted file mode 100644 index 0e597b3779..0000000000 --- a/tez-common/src/main/java/org/apache/tez/common/EnvironmentUpdateUtils.java +++ /dev/null @@ -1,127 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.tez.common; - -import org.apache.hadoop.classification.InterfaceAudience; -import org.apache.hadoop.util.Shell; - -import java.lang.reflect.Field; -import java.util.HashMap; -import java.util.Map; - -/** - * A utility class which allows one to dynamically update/change Environment variables - */ -@InterfaceAudience.Private -public class EnvironmentUpdateUtils { - - /** - * Allows dynamic update to the environment variables. After calling put, - * System.getenv(key) will then return value. - * - * @param key System environment variable - * @param value Value to assign to system environment variable - */ - public synchronized static void put(String key, String value){ - Map environment = new HashMap(System.getenv()); - environment.put(key, value); - if (!Shell.WINDOWS) { - updateEnvironment(environment); - } else { - updateEnvironmentOnWindows(environment); - } - } - - /** - * Allows dynamic update to a collection of environment variables. After - * calling putAll, System.getenv(key) will then return value for each entry - * in the map - * - * @param additionalEnvironment Collection where the key is the System - * environment variable and the value is the value to assign the system - * environment variable - */ - public synchronized static void putAll(Map additionalEnvironment) { - Map environment = new HashMap(System.getenv()); - environment.putAll(additionalEnvironment); - if (!Shell.WINDOWS) { - updateEnvironment(environment); - } else { - updateEnvironmentOnWindows(environment); - } - } - - /** - * Finds and modifies internal storage for system environment variables using - * reflection - * - * @param environment Collection where the key is the System - * environment variable and the value is the value to assign the system - * environment variable - */ - @SuppressWarnings("unchecked") - private static void updateEnvironment(Map environment) { - final Map currentEnv = System.getenv(); - copyMapValuesToPrivateField(currentEnv.getClass(), currentEnv, "m", environment); - } - - /** - * Finds and modifies internal storage for system environment variables using reflection. This - * method works only on windows. Note that the actual env is not modified, rather the copy of env - * which the JVM creates at the beginning of execution is. - * - * @param environment Collection where the key is the System - * environment variable and the value is the value to assign the system - * environment variable - */ - @SuppressWarnings("unchecked") - private static void updateEnvironmentOnWindows(Map environment) { - try { - Class processEnvironmentClass = Class.forName("java.lang.ProcessEnvironment"); - copyMapValuesToPrivateField(processEnvironmentClass, null, "theEnvironment", environment); - copyMapValuesToPrivateField(processEnvironmentClass, null, "theCaseInsensitiveEnvironment", - environment); - } catch (ClassNotFoundException e) { - throw new IllegalStateException("Failed to update Environment variables", e); - } - } - - /** - * Copies the given map values to the field specified by {@code fieldName} - * @param klass The {@code Class} of the object - * @param object The object to modify or null if the field is static - * @param fieldName The name of the field to set - * @param newMapValues The values to replace the current map. - */ - @SuppressWarnings("unchecked") - private static void copyMapValuesToPrivateField(Class klass, Object object, String fieldName, - Map newMapValues) { - try { - Field field = klass.getDeclaredField(fieldName); - field.setAccessible(true); - Map currentMap = (Map) field.get(object); - currentMap.clear(); - currentMap.putAll(newMapValues); - } catch (NoSuchFieldException e) { - throw new IllegalStateException("Failed to update Environment variables", e); - } catch (IllegalAccessException e) { - throw new IllegalStateException("Failed to update Environment variables", e); - } - } -} diff --git a/tez-common/src/main/java/org/apache/tez/common/Preconditions.java b/tez-common/src/main/java/org/apache/tez/common/Preconditions.java new file mode 100644 index 0000000000..42d862cd22 --- /dev/null +++ b/tez-common/src/main/java/org/apache/tez/common/Preconditions.java @@ -0,0 +1,116 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tez.common; + +import javax.annotation.Nullable; + +/** + * A simplified version of Guava's Preconditions for making it easy to handle its usage in Tez project. + */ +public class Preconditions { + + private Preconditions() { + } + + public static void checkArgument(boolean expression) { + if (!expression) { + throw new IllegalArgumentException(); + } + } + + public static void checkArgument(boolean expression, @Nullable Object message) { + if (!expression) { + throw new IllegalArgumentException(String.valueOf(message)); + } + } + + public static void checkArgument(boolean expression, @Nullable String template, + @Nullable Object... args) { + if (!expression) { + throw new IllegalArgumentException(format(template, args)); + } + } + + public static void checkState(boolean expression) { + if (!expression) { + throw new IllegalStateException(); + } + } + + public static void checkState(boolean expression, @Nullable Object message) { + if (!expression) { + throw new IllegalStateException(String.valueOf(message)); + } + } + + public static void checkState(boolean expression, @Nullable String template, + @Nullable Object... args) { + if (!expression) { + throw new IllegalStateException(format(template, args)); + } + } + + private static String format(@Nullable String template, @Nullable Object... args) { + template = String.valueOf(template); // null -> "null" + + if (args == null) { + args = new Object[] { "(Object[])null" }; + } else { + for (int i = 0; i < args.length; i++) { + args[i] = lenientToString(args[i]); + } + } + + // start substituting the arguments into the '%s' placeholders + StringBuilder builder = new StringBuilder(template.length() + 16 * args.length); + int templateStart = 0; + int i = 0; + while (i < args.length) { + int placeholderStart = template.indexOf("%s", templateStart); + if (placeholderStart == -1) { + break; + } + builder.append(template, templateStart, placeholderStart); + builder.append(args[i++]); + templateStart = placeholderStart + 2; + } + builder.append(template, templateStart, template.length()); + + // if we run out of placeholders, append the extra args in square braces + if (i < args.length) { + builder.append(" ["); + builder.append(args[i++]); + while (i < args.length) { + builder.append(", "); + builder.append(args[i++]); + } + builder.append(']'); + } + + return builder.toString(); + } + + private static String lenientToString(@Nullable Object o) { + try { + return String.valueOf(o); + } catch (Exception e) { + String objectToString = o.getClass().getName() + '@' + Integer.toHexString(System.identityHashCode(o)); + return "<" + objectToString + " threw " + e.getClass().getName() + ">"; + } + } +} diff --git a/tez-common/src/main/java/org/apache/tez/common/StreamHelper.java b/tez-common/src/main/java/org/apache/tez/common/StreamHelper.java new file mode 100644 index 0000000000..789d9b4268 --- /dev/null +++ b/tez-common/src/main/java/org/apache/tez/common/StreamHelper.java @@ -0,0 +1,49 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tez.common; + +import org.apache.hadoop.fs.StreamCapabilities; +import org.apache.hadoop.fs.Syncable; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; + +public final class StreamHelper { + + private static final Logger LOG = LoggerFactory.getLogger(StreamHelper.class); + + private StreamHelper() { + } + + public static void hflushIfSupported(Syncable syncable) throws IOException { + if (syncable instanceof StreamCapabilities) { + if (((StreamCapabilities) syncable).hasCapability(StreamCapabilities.HFLUSH)) { + syncable.hflush(); + } else { + // it would be no-op, if hflush is not supported by a given writer. + LOG.debug("skipping hflush, since the writer doesn't support it"); + } + } else { + // this is done for backward compatibility in order to make it work with + // older versions of Hadoop. + syncable.hflush(); + } + } +} diff --git a/tez-common/src/main/java/org/apache/tez/common/TezContainerLogAppender.java b/tez-common/src/main/java/org/apache/tez/common/TezContainerLogAppender.java index 2cfacfb08e..1649a0b318 100644 --- a/tez-common/src/main/java/org/apache/tez/common/TezContainerLogAppender.java +++ b/tez-common/src/main/java/org/apache/tez/common/TezContainerLogAppender.java @@ -19,6 +19,7 @@ package org.apache.tez.common; import java.io.File; +import java.util.Objects; import org.apache.hadoop.classification.InterfaceStability.Unstable; import org.apache.log4j.FileAppender; @@ -48,14 +49,14 @@ public void activateOptions() { * The file will be created within the container's log directory. * * @param fileName + * @throws NullPointerException if {@code fileName} is {@code null} + * @throws IllegalArgumentException if {@code fileName} is an absolute path */ public void setLogFileName(String fileName) { - if (fileName == null || fileName.contains(File.pathSeparator)) { - throw new RuntimeException( - "Invalid filename specified: " - + fileName - + " . FileName should not have a path component and should not be empty."); - } + Objects.requireNonNull(fileName); + Preconditions.checkArgument(!fileName.contains(File.pathSeparator), + "Invalid filename specified: " + fileName + + " . FileName should not have a path component and should not be empty."); this.logFileName = fileName; } diff --git a/tez-common/src/main/java/org/apache/tez/common/TezSharedExecutor.java b/tez-common/src/main/java/org/apache/tez/common/TezSharedExecutor.java index 3cc72d528f..bf8eb4f861 100644 --- a/tez-common/src/main/java/org/apache/tez/common/TezSharedExecutor.java +++ b/tez-common/src/main/java/org/apache/tez/common/TezSharedExecutor.java @@ -40,7 +40,6 @@ import org.apache.hadoop.conf.Configuration; import org.apache.tez.dag.api.TezConfiguration; -import com.google.common.base.Preconditions; import com.google.common.util.concurrent.ThreadFactoryBuilder; /** diff --git a/tez-common/src/main/java/org/apache/tez/common/TezTestUtils.java b/tez-common/src/main/java/org/apache/tez/common/TezTestUtils.java new file mode 100644 index 0000000000..1cbacbd138 --- /dev/null +++ b/tez-common/src/main/java/org/apache/tez/common/TezTestUtils.java @@ -0,0 +1,40 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tez.common; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.yarn.conf.YarnConfiguration; + +public final class TezTestUtils { + + private TezTestUtils() {} + + /** + * Ensures a reasonably high limit for yarn disk utilization. This is very important for tests, + * as devs keep bumping into silent test hangs where yarn simply considers their machines as unhealthy, + * as the default limit is 90%, even if a machine with 90% full disk is still able to function. + */ + public static void ensureHighDiskUtilizationLimit(Configuration conf) { + if (conf.getFloat(YarnConfiguration.NM_MAX_PER_DISK_UTILIZATION_PERCENTAGE, + YarnConfiguration.DEFAULT_NM_MAX_PER_DISK_UTILIZATION_PERCENTAGE) == + YarnConfiguration.DEFAULT_NM_MAX_PER_DISK_UTILIZATION_PERCENTAGE) { + conf.setFloat(YarnConfiguration.NM_MAX_PER_DISK_UTILIZATION_PERCENTAGE, 99.0f); + } + } +} diff --git a/tez-common/src/main/java/org/apache/tez/common/TezUtilsInternal.java b/tez-common/src/main/java/org/apache/tez/common/TezUtilsInternal.java index 5ba2972ea3..477134e4d2 100644 --- a/tez-common/src/main/java/org/apache/tez/common/TezUtilsInternal.java +++ b/tez-common/src/main/java/org/apache/tez/common/TezUtilsInternal.java @@ -5,9 +5,9 @@ * licenses this file to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance with the License. * You may obtain a copy of the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the @@ -21,9 +21,10 @@ import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.IOException; +import java.lang.management.ManagementFactory; import java.lang.reflect.InvocationTargetException; import java.lang.reflect.Method; -import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; import java.util.BitSet; import java.util.HashSet; import java.util.List; @@ -46,6 +47,7 @@ import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.log4j.Appender; +import org.apache.log4j.PatternLayout; import org.apache.tez.common.io.NonSyncByteArrayOutputStream; import org.apache.tez.dag.api.DagTypeConverters; import org.apache.tez.dag.records.TezDAGID; @@ -65,26 +67,18 @@ import org.apache.tez.dag.records.TaskAttemptTerminationCause; @Private -public class TezUtilsInternal { +public final class TezUtilsInternal { private static final Logger LOG = LoggerFactory.getLogger(TezUtilsInternal.class); + private TezUtilsInternal() {} + public static ConfigurationProto readUserSpecifiedTezConfiguration(String baseDir) throws IOException { - FileInputStream confPBBinaryStream = null; - ConfigurationProto.Builder confProtoBuilder = ConfigurationProto.newBuilder(); - try { - confPBBinaryStream = - new FileInputStream(new File(baseDir, TezConstants.TEZ_PB_BINARY_CONF_NAME)); - confProtoBuilder.mergeFrom(confPBBinaryStream); - } finally { - if (confPBBinaryStream != null) { - confPBBinaryStream.close(); - } + File confPBFile = new File(baseDir, TezConstants.TEZ_PB_BINARY_CONF_NAME); + try (FileInputStream fis = new FileInputStream(confPBFile)) { + return ConfigurationProto.parseFrom(fis); } - - ConfigurationProto confProto = confProtoBuilder.build(); - return confProto; } public static void addUserSpecifiedTezConfiguration(Configuration conf, @@ -95,33 +89,8 @@ public static void addUserSpecifiedTezConfiguration(Configuration conf, } } } -// -// public static void addUserSpecifiedTezConfiguration(String baseDir, Configuration conf) throws -// IOException { -// FileInputStream confPBBinaryStream = null; -// ConfigurationProto.Builder confProtoBuilder = ConfigurationProto.newBuilder(); -// try { -// confPBBinaryStream = -// new FileInputStream(new File(baseDir, TezConstants.TEZ_PB_BINARY_CONF_NAME)); -// confProtoBuilder.mergeFrom(confPBBinaryStream); -// } finally { -// if (confPBBinaryStream != null) { -// confPBBinaryStream.close(); -// } -// } -// -// ConfigurationProto confProto = confProtoBuilder.build(); -// -// List kvPairList = confProto.getConfKeyValuesList(); -// if (kvPairList != null && !kvPairList.isEmpty()) { -// for (PlanKeyValuePair kvPair : kvPairList) { -// conf.set(kvPair.getKey(), kvPair.getValue()); -// } -// } -// } - - - public static byte[] compressBytes(byte[] inBytes) throws IOException { + + public static byte[] compressBytes(byte[] inBytes) { StopWatch sw = new StopWatch().start(); byte[] compressed = compressBytesInflateDeflate(inBytes); sw.stop(); @@ -153,8 +122,7 @@ private static byte[] compressBytesInflateDeflate(byte[] inBytes) { int count = deflater.deflate(buffer); bos.write(buffer, 0, count); } - byte[] output = bos.toByteArray(); - return output; + return bos.toByteArray(); } private static byte[] uncompressBytesInflateDeflate(byte[] inBytes) throws IOException { @@ -171,8 +139,7 @@ private static byte[] uncompressBytesInflateDeflate(byte[] inBytes) throws IOExc } bos.write(buffer, 0, count); } - byte[] output = bos.toByteArray(); - return output; + return bos.toByteArray(); } private static final Pattern pattern = Pattern.compile("\\W"); @@ -187,21 +154,27 @@ public static String cleanVertexName(String vertexName) { private static String sanitizeString(String srcString) { Matcher matcher = pattern.matcher(srcString); - String res = matcher.replaceAll("_"); - return res; // Number starts allowed rightnow + return matcher.replaceAll("_"); // Number starts allowed rightnow } - public static void updateLoggers(String addend) throws FileNotFoundException { + public static void updateLoggers(Configuration configuration, String addend, String patternString) + throws FileNotFoundException { LOG.info("Redirecting log file based on addend: " + addend); - Appender appender = org.apache.log4j.Logger.getRootLogger().getAppender( - TezConstants.TEZ_CONTAINER_LOGGER_NAME); + Appender appender = + org.apache.log4j.Logger.getRootLogger().getAppender(TezConstants.TEZ_CONTAINER_LOGGER_NAME); if (appender != null) { if (appender instanceof TezContainerLogAppender) { TezContainerLogAppender claAppender = (TezContainerLogAppender) appender; claAppender.setLogFileName(constructLogFileName( - TezConstants.TEZ_CONTAINER_LOG_FILE_NAME, addend)); + addend)); + + // there was a configured pattern + if (patternString != null) { + PatternLayout layout = (PatternLayout) claAppender.getLayout(); + layout.setConversionPattern(patternString); + } claAppender.activateOptions(); } else { LOG.warn("Appender is a " + appender.getClass() + "; require an instance of " @@ -213,11 +186,11 @@ public static void updateLoggers(String addend) throws FileNotFoundException { } } - private static String constructLogFileName(String base, String addend) { + private static String constructLogFileName(String addend) { if (addend == null || addend.isEmpty()) { - return base; + return TezConstants.TEZ_CONTAINER_LOG_FILE_NAME; } else { - return base + "_" + addend; + return TezConstants.TEZ_CONTAINER_LOG_FILE_NAME + "_" + addend; } } @@ -238,7 +211,7 @@ public static byte[] toByteArray(BitSet bits) { if (bits == null) { return null; } - byte[] bytes = new byte[bits.length() / 8 + 1]; + byte[] bytes = new byte[(bits.length() + 7) / 8]; for (int i = 0; i < bits.length(); i++) { if (bits.get(i)) { bytes[(bytes.length) - (i / 8) - 1] |= 1 << (i % 8); @@ -248,9 +221,8 @@ public static byte[] toByteArray(BitSet bits) { } /** - * Convert DAGPlan to text. Skip sensitive informations like credentials. + * Convert DAGPlan to text. Skip sensitive information like credentials. * - * @param dagPlan * @return a string representation of the dag plan with sensitive information removed */ public static String convertDagPlanToString(DAGProtos.DAGPlan dagPlan) throws IOException { @@ -263,7 +235,7 @@ public static String convertDagPlanToString(DAGProtos.DAGPlan dagPlan) throws IO DagTypeConverters.convertByteStringToCredentials(dagPlan.getCredentialsBinary()); TextFormat.printField(entry.getKey(), ByteString.copyFrom(TezCommonUtils.getCredentialsInfo(credentials,"dag").getBytes( - Charset.forName("UTF-8"))), sb); + StandardCharsets.UTF_8)), sb); } } return sb.toString(); @@ -291,8 +263,6 @@ public static TaskAttemptTerminationCause fromTaskAttemptEndReason( return TaskAttemptTerminationCause.NODE_FAILED; case CONTAINER_EXITED: return TaskAttemptTerminationCause.CONTAINER_EXITED; - case OTHER: - return TaskAttemptTerminationCause.UNKNOWN_ERROR; default: return TaskAttemptTerminationCause.UNKNOWN_ERROR; } @@ -357,6 +327,25 @@ public static > Set getEnums(Configuration conf, String con return enums; } + public static Integer getPid() { + String pidStr = null; + String name = ManagementFactory.getRuntimeMXBean().getName(); + if (name != null) { + int idx = name.indexOf("@"); + if (idx != -1) { + pidStr = name.substring(0, name.indexOf("@")); + } + } + try { + if (pidStr != null) { + return Integer.valueOf(pidStr); + } + } catch (NumberFormatException nfe) { + LOG.info("Couldn't parse \"{}\" into integer pid", pidStr); + } + return null; + } + @Private public static void setHadoopCallerContext(HadoopShim hadoopShim, TezTaskAttemptID attemptID) { hadoopShim.setHadoopCallerContext("tez_ta:" + attemptID.toString()); diff --git a/tez-common/src/main/java/org/apache/tez/common/web/AbstractServletToControllerAdapter.java b/tez-common/src/main/java/org/apache/tez/common/web/AbstractServletToControllerAdapter.java new file mode 100644 index 0000000000..b79b5d5d9c --- /dev/null +++ b/tez-common/src/main/java/org/apache/tez/common/web/AbstractServletToControllerAdapter.java @@ -0,0 +1,104 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tez.common.web; + +import java.lang.reflect.InvocationTargetException; +import java.lang.reflect.Method; +import java.util.Enumeration; +import java.util.concurrent.atomic.AtomicBoolean; + +import javax.servlet.ServletConfig; +import javax.servlet.ServletContext; +import javax.servlet.ServletException; +import javax.servlet.http.HttpServlet; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +import org.apache.hadoop.yarn.webapp.Controller; + +/** + * AbstractServletToControllerAdapter is a common ancestor for classes + * that wish to adapt servlets to yarn webapp controllers. + * The adapter is responsible for: + * 1. creating a servlet instance + * 2. creating a dummy ServletConfig + * 3. delegating calls to the servlet instance's doGet method + */ +public abstract class AbstractServletToControllerAdapter extends Controller { + private AtomicBoolean initialized = new AtomicBoolean(false); + protected HttpServlet servlet; + + @Override + public void index() { + if (initialized.compareAndSet(false, true)) { + initServlet(); + } + try { + /* + * This reflection workaround is needed because HttpServlet.doGet is protected + * (even if subclasses have it public). + */ + Method doGetMethod = + this.servlet.getClass().getMethod("doGet", HttpServletRequest.class, HttpServletResponse.class); + doGetMethod.setAccessible(true); + doGetMethod.invoke(this.servlet, request(), response()); + } catch (IllegalAccessException | IllegalArgumentException | InvocationTargetException | NoSuchMethodException + | SecurityException e) { + throw new RuntimeException(e); + } + } + + /** + * Creates a dummy servlet config which is suitable for initializing a servlet instance. + * @param servletName + * @return a ServletConfig instance initialized with a ServletContext + */ + private ServletConfig getDummyServletConfig(String servletName) { + return new ServletConfig() { + + @Override + public String getServletName() { + return servletName; + } + + @Override + public ServletContext getServletContext() { + return request().getServletContext(); + } + + @Override + public Enumeration getInitParameterNames() { + return null; + } + + @Override + public String getInitParameter(String name) { + return null; + } + }; + } + + private void initServlet() { + try { + servlet.init(getDummyServletConfig(this.servlet.getClass().getSimpleName())); + } catch (ServletException e) { + throw new RuntimeException(e); + } + } +} diff --git a/tez-common/src/main/java/org/apache/tez/common/web/ProfileOutputServlet.java b/tez-common/src/main/java/org/apache/tez/common/web/ProfileOutputServlet.java new file mode 100644 index 0000000000..2fac77cdc8 --- /dev/null +++ b/tez-common/src/main/java/org/apache/tez/common/web/ProfileOutputServlet.java @@ -0,0 +1,66 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tez.common.web; + +import org.apache.hadoop.yarn.webapp.MimeType; +import org.eclipse.jetty.servlet.DefaultServlet; + +import javax.servlet.ServletException; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; +import java.io.File; +import java.io.IOException; +import java.io.PrintWriter; +import java.nio.file.Files; +import java.nio.file.Paths; + +/** + * Servlet to serve files generated by {@link ProfileServlet}. + */ +public class ProfileOutputServlet extends DefaultServlet { + public static final String FILE_QUERY_PARAM = "file"; + + public void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { + String queriedFile = request.getParameter(FILE_QUERY_PARAM); + if (queriedFile == null) { + writeMessage(response, "Run the profiler to be able to receive its output"); + return; + } + File outputFile = new File(ProfileServlet.OUTPUT_DIR, queriedFile); + if (!outputFile.exists()) { + writeMessage(response, "Requested file does not exist: " + queriedFile); + return; + } + if (outputFile.length() < 100) { + response.setIntHeader("Refresh", 2); + writeMessage(response, "This page auto-refreshes every 2 seconds until output file is ready..."); + return; + } + response.setContentType(MimeType.HTML); + response.getOutputStream().write(Files.readAllBytes(Paths.get(outputFile.getPath()))); + response.getOutputStream().flush(); + response.getOutputStream().close(); + } + + private void writeMessage(HttpServletResponse response, String message) throws IOException { + response.setContentType(MimeType.TEXT); + PrintWriter out = response.getWriter(); + out.println(message); + out.close(); + } +} diff --git a/tez-common/src/main/java/org/apache/tez/common/web/ProfileServlet.java b/tez-common/src/main/java/org/apache/tez/common/web/ProfileServlet.java new file mode 100644 index 0000000000..2884bbb4d7 --- /dev/null +++ b/tez-common/src/main/java/org/apache/tez/common/web/ProfileServlet.java @@ -0,0 +1,385 @@ + /** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tez.common.web; + +import com.google.common.base.Joiner; + +import org.apache.hadoop.http.HttpServer2; +import org.apache.tez.common.TezUtilsInternal; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import javax.servlet.ServletException; +import javax.servlet.http.HttpServlet; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; +import java.io.File; +import java.io.IOException; +import java.io.PrintStream; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.locks.Lock; +import java.util.concurrent.locks.ReentrantLock; + +/** + *
+ * Servlet that runs async-profiler as web-endpoint.
+ * Following options from async-profiler can be specified as query parameter.
+ * //  -e event          profiling event: cpu|alloc|lock|cache-misses etc.
+ * //  -d duration       run profiling for{@literal } seconds (integer)
+ * //  -i interval       sampling interval in nanoseconds (long)
+ * //  -j jstackdepth    maximum Java stack depth (integer)
+ * //  -b bufsize        frame buffer size (long)
+ * //  -m method         fully qualified method name: 'ClassName.methodName'
+ * //  -t                profile different threads separately
+ * //  -s                simple class names instead of FQN
+ * //  -o fmt[,fmt...]   output format: summary|traces|flat|collapsed|svg|tree|jfr|html
+ * //  --width px        SVG width pixels (integer)
+ * //  --height px       SVG frame height pixels (integer)
+ * //  --minwidth px     skip frames smaller than px (double)
+ * //  --reverse         generate stack-reversed FlameGraph / Call tree
+ * Example:
+ * - To collect 10 second CPU profile of current process (returns FlameGraph html)
+ * {@literal curl "http://localhost:10002/prof"}
+ * - To collect 1 minute CPU profile of current process and output in tree format (html)
+ * {@literal curl  "http://localhost:10002/prof?output=tree&duration=60"}
+ * - To collect 10 second heap allocation profile of current process (returns FlameGraph html)
+ * {@literal curl "http://localhost:10002/prof?event=alloc"}
+ * - To collect lock contention profile of current process (returns FlameGraph html)
+ * {@literal curl "http://localhost:10002/prof?event=lock"}
+ * Following event types are supported (default is 'cpu') (NOTE: not all OS'es support all events)
+ * // Perf events:
+ * //    cpu
+ * //    page-faults
+ * //    context-switches
+ * //    cycles
+ * //    instructions
+ * //    cache-references
+ * //    cache-misses
+ * //    branches
+ * //    branch-misses
+ * //    bus-cycles
+ * //    L1-dcache-load-misses
+ * //    LLC-load-misses
+ * //    dTLB-load-misses
+ * //    mem:breakpoint
+ * //    trace:tracepoint
+ * // Java events:
+ * //    alloc
+ * //    lock
+ * 
+ */ +public class ProfileServlet extends HttpServlet { + private static final long serialVersionUID = 1L; + private static final Logger LOG = LoggerFactory.getLogger(ProfileServlet.class); + private static final String ACCESS_CONTROL_ALLOW_METHODS = "Access-Control-Allow-Methods"; + private static final String ALLOWED_METHODS = "GET"; + private static final String ACCESS_CONTROL_ALLOW_ORIGIN = "Access-Control-Allow-Origin"; + private static final String CONTENT_TYPE_TEXT = "text/plain; charset=utf-8"; + private static final String ASYNC_PROFILER_HOME_ENV = "ASYNC_PROFILER_HOME"; + private static final String ASYNC_PROFILER_HOME_SYSTEM_PROPERTY = "async.profiler.home"; + private static final int DEFAULT_DURATION_SECONDS = 10; + private static final AtomicInteger ID_GEN = new AtomicInteger(0); + public static final String OUTPUT_DIR = System.getProperty("java.io.tmpdir") + "/prof-output"; + + enum Event { + CPU("cpu"), + ALLOC("alloc"), + LOCK("lock"), + PAGE_FAULTS("page-faults"), + CONTEXT_SWITCHES("context-switches"), + CYCLES("cycles"), + INSTRUCTIONS("instructions"), + CACHE_REFERENCES("cache-references"), + CACHE_MISSES("cache-misses"), + BRANCHES("branches"), + BRANCH_MISSES("branch-misses"), + BUS_CYCLES("bus-cycles"), + L1_DCACHE_LOAD_MISSES("L1-dcache-load-misses"), + LLC_LOAD_MISSES("LLC-load-misses"), + DTLB_LOAD_MISSES("dTLB-load-misses"), + MEM_BREAKPOINT("mem:breakpoint"), + TRACE_TRACEPOINT("trace:tracepoint"); + + private final String internalName; + + Event(final String internalName) { + this.internalName = internalName; + } + + public String getInternalName() { + return internalName; + } + + public static Event fromInternalName(final String name) { + for (Event event : values()) { + if (event.getInternalName().equalsIgnoreCase(name)) { + return event; + } + } + return null; + } + } + + enum Output { + SUMMARY, TRACES, FLAT, COLLAPSED, SVG, TREE, JFR, HTML + } + + private final Lock profilerLock = new ReentrantLock(); + private Integer pid; + private String asyncProfilerHome; + private transient Process process; + + public ProfileServlet() { + this.asyncProfilerHome = getAsyncProfilerHome(); + this.pid = TezUtilsInternal.getPid(); + LOG.info("Servlet process PID: {} asyncProfilerHome: {}", pid, asyncProfilerHome); + } + + public void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { + response.setContentType("text/plain; charset=UTF-8"); + PrintStream out = new PrintStream(response.getOutputStream(), false, "UTF-8"); + if (!HttpServer2.isInstrumentationAccessAllowed(this.getServletContext(), request, response)) { + response.setStatus(HttpServletResponse.SC_UNAUTHORIZED); + setResponseHeader(response); + out.println("Unauthorized: Instrumentation access is not allowed!"); + out.close(); + return; + } + + // make sure async profiler home is set + if (asyncProfilerHome == null || asyncProfilerHome.trim().isEmpty()) { + response.setStatus(HttpServletResponse.SC_INTERNAL_SERVER_ERROR); + setResponseHeader(response); + out.println("ASYNC_PROFILER_HOME env is not set"); + out.close(); + return; + } + + // if pid is explicitly specified, use it else default to current process + pid = getInteger(request, "pid", pid); + // if pid is not specified in query param and if current process pid cannot be determined + if (pid == null) { + response.setStatus(HttpServletResponse.SC_INTERNAL_SERVER_ERROR); + setResponseHeader(response); + out.println("'pid' query parameter unspecified or unable to determine PID of current process."); + out.close(); + return; + } + + final int duration = getInteger(request, "duration", DEFAULT_DURATION_SECONDS); + final Output output = getOutput(request); + final Event event = getEvent(request); + final Long interval = getLong(request, "interval"); + final Integer jstackDepth = getInteger(request, "jstackdepth", null); + final Long bufsize = getLong(request, "bufsize"); + final boolean thread = request.getParameterMap().containsKey("thread"); + final boolean simple = request.getParameterMap().containsKey("simple"); + final Integer width = getInteger(request, "width", null); + final Integer height = getInteger(request, "height", null); + final Double minwidth = getMinWidth(request); + final boolean reverse = request.getParameterMap().containsKey("reverse"); + final String method = request.getParameter("method"); + + if (request.getParameter("event") != null && method != null) { + response.setStatus(HttpServletResponse.SC_BAD_REQUEST); + setResponseHeader(response); + response.getWriter().write("Event and method aren't allowed to be both used in the same request."); + return; + } + + if (process == null || !process.isAlive()) { + try { + int lockTimeoutSecs = 3; + if (profilerLock.tryLock(lockTimeoutSecs, TimeUnit.SECONDS)) { + try { + File outputFile = new File(OUTPUT_DIR, + "async-prof-pid-" + pid + "-" + + (method == null ? event.name().toLowerCase() : method) + "-" + ID_GEN.incrementAndGet() + "." + + output.name().toLowerCase()); + List cmd = new ArrayList<>(); + cmd.add(getProfilerScriptPath()); + cmd.add("-e"); + cmd.add(method == null ? event.getInternalName() : method); + cmd.add("-d"); + cmd.add("" + duration); + cmd.add("-o"); + cmd.add(output.name().toLowerCase()); + cmd.add("-f"); + cmd.add(outputFile.getAbsolutePath()); + if (interval != null) { + cmd.add("-i"); + cmd.add(interval.toString()); + } + if (jstackDepth != null) { + cmd.add("-j"); + cmd.add(jstackDepth.toString()); + } + if (bufsize != null) { + cmd.add("-b"); + cmd.add(bufsize.toString()); + } + if (thread) { + cmd.add("-t"); + } + if (simple) { + cmd.add("-s"); + } + if (width != null) { + cmd.add("--width"); + cmd.add(width.toString()); + } + if (height != null) { + cmd.add("--height"); + cmd.add(height.toString()); + } + if (minwidth != null) { + cmd.add("--minwidth"); + cmd.add(minwidth.toString()); + } + if (reverse) { + cmd.add("--reverse"); + } + cmd.add(pid.toString()); + process = new ProcessBuilder(cmd).start(); + + // set response and set refresh header to output location + setResponseHeader(response); + response.setStatus(HttpServletResponse.SC_ACCEPTED); + String relativeUrl = "/prof-output"; + // to avoid auto-refresh by ProfileOutputServlet, refreshDelay can be specified via url param + int refreshDelay = getInteger(request, "refreshDelay", 0); + // instead of sending redirect, set auto-refresh so that browsers will refresh with redirected url + response.setHeader("Refresh", (duration + refreshDelay) + "; URL=" + relativeUrl + '?' + + ProfileOutputServlet.FILE_QUERY_PARAM + '=' + outputFile.getName()); + + out.println("Profiled PID: " + pid); + out.println("Started [" + event.getInternalName() + + "] profiling. This page will automatically redirect to " + + relativeUrl + " after " + duration + " seconds.\n\ncommand:\n" + Joiner.on(" ").join(cmd)); + out.flush(); + } finally { + profilerLock.unlock(); + } + } else { + setResponseHeader(response); + response.setStatus(HttpServletResponse.SC_INTERNAL_SERVER_ERROR); + out.println("Unable to acquire lock. Another instance of profiler might be running."); + LOG.warn("Unable to acquire lock in {} seconds. Another instance of profiler might be running.", + lockTimeoutSecs); + } + } catch (InterruptedException e) { + LOG.warn("Interrupted while acquiring profile lock.", e); + response.setStatus(HttpServletResponse.SC_INTERNAL_SERVER_ERROR); + } + } else { + setResponseHeader(response); + response.setStatus(HttpServletResponse.SC_INTERNAL_SERVER_ERROR); + out.println("Another instance of profiler is already running."); + } + out.close(); + } + + /** + * Get the path of the profiler script to be executed. + * Before async-profiler 3.0, the script was named profiler.sh, and after 3.0 it's bin/asprof + * @return + */ + private String getProfilerScriptPath() { + Path defaultPath = Paths.get(asyncProfilerHome + "/bin/asprof"); + return Files.exists(defaultPath)? defaultPath.toString() : asyncProfilerHome + "/profiler.sh"; + } + + private Integer getInteger(final HttpServletRequest req, final String param, final Integer defaultValue) { + final String value = req.getParameter(param); + if (value != null) { + try { + return Integer.valueOf(value); + } catch (NumberFormatException e) { + return defaultValue; + } + } + return defaultValue; + } + + private Long getLong(final HttpServletRequest req, final String param) { + final String value = req.getParameter(param); + if (value != null) { + try { + return Long.valueOf(value); + } catch (NumberFormatException e) { + return null; + } + } + return null; + } + + private Double getMinWidth(final HttpServletRequest req) { + final String value = req.getParameter("minwidth"); + if (value != null) { + try { + return Double.valueOf(value); + } catch (NumberFormatException e) { + return null; + } + } + return null; + } + + private Event getEvent(final HttpServletRequest req) { + final String eventArg = req.getParameter("event"); + if (eventArg != null) { + Event event = Event.fromInternalName(eventArg); + return event == null ? Event.CPU : event; + } + return Event.CPU; + } + + private Output getOutput(final HttpServletRequest req) { + final String outputArg = req.getParameter("output"); + if (outputArg != null) { + try { + return Output.valueOf(outputArg.trim().toUpperCase()); + } catch (IllegalArgumentException e) { + LOG.warn("Output format value is invalid, returning with default HTML"); + return Output.HTML; + } + } + return Output.HTML; + } + + private void setResponseHeader(final HttpServletResponse response) { + response.setHeader(ACCESS_CONTROL_ALLOW_METHODS, ALLOWED_METHODS); + response.setHeader(ACCESS_CONTROL_ALLOW_ORIGIN, "*"); + response.setContentType(CONTENT_TYPE_TEXT); + } + + public static String getAsyncProfilerHome() { + String asyncProfilerHome = System.getenv(ASYNC_PROFILER_HOME_ENV); + // if ENV is not set, see if -Dasync.profiler.home=/path/to/async/profiler/home is set + if (asyncProfilerHome == null || asyncProfilerHome.trim().isEmpty()) { + asyncProfilerHome = System.getProperty(ASYNC_PROFILER_HOME_SYSTEM_PROPERTY); + } + return asyncProfilerHome; + } +} diff --git a/tez-common/src/main/java/org/apache/tez/common/web/ServletToControllerAdapters.java b/tez-common/src/main/java/org/apache/tez/common/web/ServletToControllerAdapters.java new file mode 100644 index 0000000000..304e9a9118 --- /dev/null +++ b/tez-common/src/main/java/org/apache/tez/common/web/ServletToControllerAdapters.java @@ -0,0 +1,58 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tez.common.web; + +import javax.servlet.ServletException; + +import org.apache.hadoop.conf.ConfServlet; +import org.apache.hadoop.http.HttpServer2.StackServlet; +import org.apache.hadoop.jmx.JMXJsonServlet; + +public class ServletToControllerAdapters { + public static class JMXJsonServletController extends AbstractServletToControllerAdapter { + public JMXJsonServletController() throws ServletException { + this.servlet = new JMXJsonServlet(); + } + } + + public static class ConfServletController extends AbstractServletToControllerAdapter { + public ConfServletController() throws ServletException { + this.servlet = new ConfServlet(); + } + } + + public static class StackServletController extends AbstractServletToControllerAdapter { + public StackServletController() throws ServletException { + this.servlet = new StackServlet(); + } + } + + public static class ProfileServletController extends AbstractServletToControllerAdapter { + public ProfileServletController() throws ServletException { + this.servlet = new ProfileServlet(); + } + } + + public static class ProfileOutputServletController extends AbstractServletToControllerAdapter { + public ProfileOutputServletController() throws ServletException { + this.servlet = new ProfileOutputServlet(); + } + } + +} diff --git a/tez-common/src/main/java/org/apache/tez/common/web/package-info.java b/tez-common/src/main/java/org/apache/tez/common/web/package-info.java new file mode 100644 index 0000000000..2fbda31fda --- /dev/null +++ b/tez-common/src/main/java/org/apache/tez/common/web/package-info.java @@ -0,0 +1,22 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +@Private +package org.apache.tez.common.web; + +import org.apache.hadoop.classification.InterfaceAudience.Private; \ No newline at end of file diff --git a/tez-common/src/main/java/org/apache/tez/dag/records/DAGIDAware.java b/tez-common/src/main/java/org/apache/tez/dag/records/DAGIDAware.java new file mode 100644 index 0000000000..1234a30053 --- /dev/null +++ b/tez-common/src/main/java/org/apache/tez/dag/records/DAGIDAware.java @@ -0,0 +1,29 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tez.dag.records; + +import org.apache.hadoop.yarn.api.records.ApplicationId; + +public interface DAGIDAware { + TezDAGID getDAGID(); + + default ApplicationId getApplicationId() { + return getDAGID().getApplicationId(); + } +} diff --git a/tez-common/src/main/java/org/apache/tez/dag/records/TaskAttemptIDAware.java b/tez-common/src/main/java/org/apache/tez/dag/records/TaskAttemptIDAware.java new file mode 100644 index 0000000000..924fd07109 --- /dev/null +++ b/tez-common/src/main/java/org/apache/tez/dag/records/TaskAttemptIDAware.java @@ -0,0 +1,28 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tez.dag.records; + +public interface TaskAttemptIDAware extends TaskIDAware { + TezTaskAttemptID getTaskAttemptID(); + + @Override + default TezTaskID getTaskID() { + return getTaskAttemptID().getTaskID(); + } +} diff --git a/tez-common/src/main/java/org/apache/tez/dag/records/TaskAttemptTerminationCause.java b/tez-common/src/main/java/org/apache/tez/dag/records/TaskAttemptTerminationCause.java index 14eaa3a8db..9c99af0066 100644 --- a/tez-common/src/main/java/org/apache/tez/dag/records/TaskAttemptTerminationCause.java +++ b/tez-common/src/main/java/org/apache/tez/dag/records/TaskAttemptTerminationCause.java @@ -20,7 +20,7 @@ public enum TaskAttemptTerminationCause { UNKNOWN_ERROR, // The error cause is unknown. Usually means a gap in error propagation - + TERMINATED_BY_CLIENT, // Killed by client command TERMINATED_AT_SHUTDOWN, // Killed due execution shutdown TERMINATED_AT_RECOVERY, // Killed in recovery, due to can not recover running task attempt @@ -29,7 +29,7 @@ public enum TaskAttemptTerminationCause { TERMINATED_INEFFECTIVE_SPECULATION, // Killed speculative attempt because original succeeded TERMINATED_EFFECTIVE_SPECULATION, // Killed original attempt because speculation succeeded TERMINATED_ORPHANED, // Attempt is no longer needed by the task - + APPLICATION_ERROR, // Failed due to application code error FRAMEWORK_ERROR, // Failed due to code error in Tez code INPUT_READ_ERROR, // Failed due to error in reading inputs @@ -37,12 +37,12 @@ public enum TaskAttemptTerminationCause { OUTPUT_LOST, // Failed because attempts output were reported lost NO_PROGRESS, // Failed because no progress was being made TASK_HEARTBEAT_ERROR, // Failed because AM lost connection to the task - + CONTAINER_LAUNCH_FAILED, // Failed to launch container CONTAINER_EXITED, // Container exited. Indicates gap in specific error propagation from the cluster CONTAINER_STOPPED, // Container stopped or released by Tez NODE_FAILED, // Node for the container failed - NODE_DISK_ERROR, // Disk failed on the node runnign the task + NODE_DISK_ERROR, // Disk failed on the node running the task COMMUNICATION_ERROR, // Equivalent to a launch failure SERVICE_BUSY, // Service rejected the task diff --git a/tez-common/src/main/java/org/apache/tez/dag/records/TaskIDAware.java b/tez-common/src/main/java/org/apache/tez/dag/records/TaskIDAware.java new file mode 100644 index 0000000000..0bee45dfa1 --- /dev/null +++ b/tez-common/src/main/java/org/apache/tez/dag/records/TaskIDAware.java @@ -0,0 +1,28 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tez.dag.records; + +public interface TaskIDAware extends VertexIDAware { + TezTaskID getTaskID(); + + @Override + default TezVertexID getVertexID() { + return getTaskID().getVertexID(); + } +} diff --git a/tez-common/src/main/java/org/apache/tez/dag/records/TezDAGID.java b/tez-common/src/main/java/org/apache/tez/dag/records/TezDAGID.java index 2e3309e4dc..8e6a7b65d9 100644 --- a/tez-common/src/main/java/org/apache/tez/dag/records/TezDAGID.java +++ b/tez-common/src/main/java/org/apache/tez/dag/records/TezDAGID.java @@ -21,13 +21,15 @@ import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; +import java.util.Objects; -import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.yarn.api.records.ApplicationId; -import com.google.common.base.Preconditions; import org.apache.tez.util.FastNumberFormat; +import com.google.common.collect.Interner; +import com.google.common.collect.Interners; + /** * TezDAGID represents the immutable and unique identifier for * a Tez DAG. @@ -40,41 +42,38 @@ */ public class TezDAGID extends TezID { - private static TezIDCache tezDAGIDCache = new TezIDCache<>(); + private static Interner tezDAGIDCache = Interners.newWeakInterner(); private ApplicationId applicationId; /** * Get a DAGID object from given {@link ApplicationId}. * @param applicationId Application that this dag belongs to * @param id the dag number + * @throws NullPointerException if {@code obj} is {@code applicationId} */ public static TezDAGID getInstance(ApplicationId applicationId, int id) { // The newly created TezDAGIds are primarily for their hashCode method, and // will be short-lived. // Alternately the cache can be keyed by the hash of the incoming paramters. - Preconditions.checkArgument(applicationId != null, "ApplicationID cannot be null"); - return tezDAGIDCache.getInstance(new TezDAGID(applicationId, id)); + Objects.requireNonNull(applicationId, "ApplicationID cannot be null"); + return tezDAGIDCache.intern(new TezDAGID(applicationId, id)); } - @InterfaceAudience.Private - public static void clearCache() { - tezDAGIDCache.clear(); - } - /** * Get a DAGID object from given parts. * @param yarnRMIdentifier YARN RM identifier * @param appId application number * @param id the dag number + * @throws NullPointerException if {@code yarnRMIdentifier} is {@code null} */ public static TezDAGID getInstance(String yarnRMIdentifier, int appId, int id) { // The newly created TezDAGIds are primarily for their hashCode method, and // will be short-lived. - // Alternately the cache can be keyed by the hash of the incoming paramters. - Preconditions.checkArgument(yarnRMIdentifier != null, "yarnRMIdentifier cannot be null"); - return tezDAGIDCache.getInstance(new TezDAGID(yarnRMIdentifier, appId, id)); + // Alternately the cache can be keyed by the hash of the incoming parameters. + Objects.requireNonNull(yarnRMIdentifier, "yarnRMIdentifier cannot be null"); + return tezDAGIDCache.intern(new TezDAGID(yarnRMIdentifier, appId, id)); } - + // Public for Writable serialization. Verify if this is actually required. public TezDAGID() { } @@ -84,7 +83,7 @@ private TezDAGID(ApplicationId applicationId, int id) { this.applicationId = applicationId; } - + private TezDAGID(String yarnRMIdentifier, int appId, int id) { this(ApplicationId.newInstance(Long.parseLong(yarnRMIdentifier), appId), id); @@ -128,7 +127,7 @@ public static TezDAGID readTezDAGID(DataInput in) throws IOException { TezDAGID dagID = getInstance(ApplicationId.newInstance(clusterId, appId), dagIdInt); return dagID; } - + @Override public void write(DataOutput out) throws IOException { out.writeLong(applicationId.getClusterTimestamp()); diff --git a/tez-common/src/main/java/org/apache/tez/dag/records/TezID.java b/tez-common/src/main/java/org/apache/tez/dag/records/TezID.java index cd7b27de45..7efbd9a889 100644 --- a/tez-common/src/main/java/org/apache/tez/dag/records/TezID.java +++ b/tez-common/src/main/java/org/apache/tez/dag/records/TezID.java @@ -21,8 +21,6 @@ import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; -import java.lang.ref.WeakReference; -import java.util.WeakHashMap; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; @@ -42,25 +40,6 @@ public abstract class TezID implements WritableComparable { public static final char SEPARATOR = '_'; protected int id; - public static class TezIDCache { - private final WeakHashMap> cache = new WeakHashMap<>(); - - synchronized T getInstance(final T id) { - final WeakReference cached = cache.get(id); - if (cached != null) { - final T value = cached.get(); - if (value != null) - return value; - } - cache.put(id, new WeakReference(id)); - return id; - } - - synchronized void clear() { - cache.clear(); - } - } - /** constructs an ID object from the given int */ public TezID(int id) { this.id = id; diff --git a/tez-common/src/main/java/org/apache/tez/dag/records/TezTaskAttemptID.java b/tez-common/src/main/java/org/apache/tez/dag/records/TezTaskAttemptID.java index 7aee80f4d6..fe2b84449f 100644 --- a/tez-common/src/main/java/org/apache/tez/dag/records/TezTaskAttemptID.java +++ b/tez-common/src/main/java/org/apache/tez/dag/records/TezTaskAttemptID.java @@ -21,10 +21,14 @@ import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; +import java.util.Objects; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; +import com.google.common.collect.Interner; +import com.google.common.collect.Interners; + /** * TezTaskAttemptID represents the immutable and unique identifier for * a task attempt. Each task attempt is one particular instance of a Tez Task @@ -42,11 +46,11 @@ */ @InterfaceAudience.Public @InterfaceStability.Stable -public class TezTaskAttemptID extends TezID { +public class TezTaskAttemptID extends TezID implements TaskIDAware { public static final String ATTEMPT = "attempt"; private TezTaskID taskId; - private static TezIDCache tezTaskAttemptIDCache = new TezIDCache<>(); + private static Interner tezTaskAttemptIDCache = Interners.newWeakInterner(); // Public for Writable serialization. Verify if this is actually required. public TezTaskAttemptID() { @@ -56,25 +60,20 @@ public TezTaskAttemptID() { * Constructs a TaskAttemptID object from given {@link TezTaskID}. * @param taskID TaskID that this task belongs to * @param id the task attempt number + * @throws NullPointerException if {@code taskID} is {@code null} */ public static TezTaskAttemptID getInstance(TezTaskID taskID, int id) { - return tezTaskAttemptIDCache.getInstance(new TezTaskAttemptID(taskID, id)); - } - - @InterfaceAudience.Private - public static void clearCache() { - tezTaskAttemptIDCache.clear(); + Objects.requireNonNull(taskID); + return tezTaskAttemptIDCache.intern(new TezTaskAttemptID(taskID, id)); } private TezTaskAttemptID(TezTaskID taskId, int id) { super(id); - if(taskId == null) { - throw new IllegalArgumentException("taskId cannot be null"); - } this.taskId = taskId; } /** Returns the {@link TezTaskID} object that this task attempt belongs to */ + @Override public TezTaskID getTaskID() { return taskId; } diff --git a/tez-common/src/main/java/org/apache/tez/dag/records/TezTaskID.java b/tez-common/src/main/java/org/apache/tez/dag/records/TezTaskID.java index 3295f6a707..08310f3dfc 100644 --- a/tez-common/src/main/java/org/apache/tez/dag/records/TezTaskID.java +++ b/tez-common/src/main/java/org/apache/tez/dag/records/TezTaskID.java @@ -21,13 +21,16 @@ import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; +import java.util.Objects; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; -import com.google.common.base.Preconditions; import org.apache.tez.util.FastNumberFormat; +import com.google.common.collect.Interner; +import com.google.common.collect.Interners; + /** * TaskID represents the immutable and unique identifier for * a Tez Task. Each TaskID encompasses multiple attempts made to @@ -38,7 +41,7 @@ */ @InterfaceAudience.Public @InterfaceStability.Stable -public class TezTaskID extends TezID { +public class TezTaskID extends TezID implements VertexIDAware { public static final String TASK = "task"; private final int serializingHash; @@ -51,27 +54,22 @@ public FastNumberFormat initialValue() { } }; - private static TezIDCache tezTaskIDCache = new TezIDCache<>(); + private static Interner tezTaskIDCache = Interners.newWeakInterner(); private TezVertexID vertexId; /** * Constructs a TezTaskID object from given {@link TezVertexID}. * @param vertexID the vertexID object for this TezTaskID * @param id the tip number + * @throws NullPointerException if {@code vertexID} is {@code null} */ public static TezTaskID getInstance(TezVertexID vertexID, int id) { - Preconditions.checkArgument(vertexID != null, "vertexID cannot be null"); - return tezTaskIDCache.getInstance(new TezTaskID(vertexID, id)); - } - - @InterfaceAudience.Private - public static void clearCache() { - tezTaskIDCache.clear(); + Objects.requireNonNull(vertexID, "vertexID cannot be null"); + return tezTaskIDCache.intern(new TezTaskID(vertexID, id)); } private TezTaskID(TezVertexID vertexID, int id) { super(id); - Preconditions.checkArgument(vertexID != null, "vertexID cannot be null"); this.vertexId = vertexID; this.serializingHash = getHashCode(true); } @@ -81,6 +79,7 @@ public int getSerializingHash() { } /** Returns the {@link TezVertexID} object that this task belongs to */ + @Override public TezVertexID getVertexID() { return vertexId; } diff --git a/tez-common/src/main/java/org/apache/tez/dag/records/TezVertexID.java b/tez-common/src/main/java/org/apache/tez/dag/records/TezVertexID.java index b56c9adfa5..e428317237 100644 --- a/tez-common/src/main/java/org/apache/tez/dag/records/TezVertexID.java +++ b/tez-common/src/main/java/org/apache/tez/dag/records/TezVertexID.java @@ -21,18 +21,21 @@ import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; +import java.util.Objects; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; -import com.google.common.base.Preconditions; import org.apache.tez.util.FastNumberFormat; +import com.google.common.collect.Interner; +import com.google.common.collect.Interners; + /** * TezVertexID represents the immutable and unique identifier for * a Vertex in a Tez DAG. Each TezVertexID encompasses multiple Tez Tasks. * - * TezVertezID consists of 2 parts. The first part is the {@link TezDAGID}, + * TezVertexID consists of 2 parts. The first part is the {@link TezDAGID}, * that is the Tez DAG that this vertex belongs to. The second part is * the vertex number. * @@ -41,7 +44,7 @@ */ @InterfaceAudience.Public @InterfaceStability.Stable -public class TezVertexID extends TezID { +public class TezVertexID extends TezID implements DAGIDAware { public static final String VERTEX = "vertex"; static final ThreadLocal tezVertexIdFormat = new ThreadLocal() { @@ -53,7 +56,7 @@ public FastNumberFormat initialValue() { } }; - private static TezIDCache tezVertexIDCache = new TezIDCache<>(); + private static Interner tezVertexIDCache = Interners.newWeakInterner(); private TezDAGID dagId; // Public for Writable serialization. Verify if this is actually required. @@ -64,15 +67,11 @@ public TezVertexID() { * Constructs a TezVertexID object from given {@link TezDAGID}. * @param dagId TezDAGID object for this TezVertexID * @param id the tip number + * @throws NullPointerException if {@code dagId} is {@code null} */ public static TezVertexID getInstance(TezDAGID dagId, int id) { - Preconditions.checkArgument(dagId != null, "DagID cannot be null"); - return tezVertexIDCache.getInstance(new TezVertexID(dagId, id)); - } - - @InterfaceAudience.Private - public static void clearCache() { - tezVertexIDCache.clear(); + Objects.requireNonNull(dagId, "DagID cannot be null"); + return tezVertexIDCache.intern(new TezVertexID(dagId, id)); } private TezVertexID(TezDAGID dagId, int id) { @@ -81,7 +80,8 @@ private TezVertexID(TezDAGID dagId, int id) { } /** Returns the {@link TezDAGID} object that this tip belongs to */ - public TezDAGID getDAGId() { + @Override + public TezDAGID getDAGID() { return dagId; } @@ -113,7 +113,7 @@ public void readFields(DataInput in) throws IOException { dagId = TezDAGID.readTezDAGID(in); super.readFields(in); } - + public static TezVertexID readTezVertexID(DataInput in) throws IOException { TezDAGID dagID = TezDAGID.readTezDAGID(in); int vertexIdInt = TezID.readID(in); @@ -160,5 +160,4 @@ public static TezVertexID fromString(String vertexIdStr) { } return null; } - } diff --git a/tez-common/src/main/java/org/apache/tez/dag/records/VertexIDAware.java b/tez-common/src/main/java/org/apache/tez/dag/records/VertexIDAware.java new file mode 100644 index 0000000000..01bbe859b6 --- /dev/null +++ b/tez-common/src/main/java/org/apache/tez/dag/records/VertexIDAware.java @@ -0,0 +1,28 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tez.dag.records; + +public interface VertexIDAware extends DAGIDAware { + TezVertexID getVertexID(); + + @Override + default TezDAGID getDAGID() { + return getVertexID().getDAGID(); + } +} diff --git a/tez-common/src/main/java/org/apache/tez/dag/records/VertexIdentifierImpl.java b/tez-common/src/main/java/org/apache/tez/dag/records/VertexIdentifierImpl.java index 4480f742fc..83b503203c 100644 --- a/tez-common/src/main/java/org/apache/tez/dag/records/VertexIdentifierImpl.java +++ b/tez-common/src/main/java/org/apache/tez/dag/records/VertexIdentifierImpl.java @@ -30,7 +30,7 @@ public class VertexIdentifierImpl implements VertexIdentifier { public VertexIdentifierImpl(String dagName, String vertexName, TezVertexID vertexId) { this.vertexId = vertexId; this.vertexName = vertexName; - this.dagIdentifier = new DagIdentifierImpl(dagName, vertexId.getDAGId()); + this.dagIdentifier = new DagIdentifierImpl(dagName, vertexId.getDAGID()); } @Override diff --git a/tez-common/src/main/java/org/apache/tez/dag/utils/RelocalizationUtils.java b/tez-common/src/main/java/org/apache/tez/dag/utils/RelocalizationUtils.java index 84a9474a39..9ccfc76be7 100644 --- a/tez-common/src/main/java/org/apache/tez/dag/utils/RelocalizationUtils.java +++ b/tez-common/src/main/java/org/apache/tez/dag/utils/RelocalizationUtils.java @@ -31,15 +31,16 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.tez.common.ReflectionUtils; -import org.apache.tez.dag.api.TezException; import com.google.common.collect.Lists; @InterfaceAudience.Private -public class RelocalizationUtils { +public final class RelocalizationUtils { + + private RelocalizationUtils() {} public static List processAdditionalResources(Map additionalResources, - Configuration conf, String destDir) throws IOException, TezException { + Configuration conf, String destDir) throws IOException { if (additionalResources == null || additionalResources.isEmpty()) { return Collections.emptyList(); } diff --git a/tez-common/src/main/java/org/apache/tez/runtime/common/resources/InitialMemoryRequestContext.java b/tez-common/src/main/java/org/apache/tez/runtime/common/resources/InitialMemoryRequestContext.java index aec3795435..da361a1e59 100644 --- a/tez-common/src/main/java/org/apache/tez/runtime/common/resources/InitialMemoryRequestContext.java +++ b/tez-common/src/main/java/org/apache/tez/runtime/common/resources/InitialMemoryRequestContext.java @@ -18,7 +18,8 @@ package org.apache.tez.runtime.common.resources; -import com.google.common.base.Preconditions; +import java.util.Objects; + import org.apache.hadoop.classification.InterfaceAudience.Private; @@ -38,9 +39,9 @@ public static enum ComponentType { public InitialMemoryRequestContext(long requestedSize, String componentClassName, ComponentType componentType, String componentVertexName) { - Preconditions.checkNotNull(componentClassName, "componentClassName is null"); - Preconditions.checkNotNull(componentType, "componentType is null"); - Preconditions.checkNotNull(componentVertexName, "componentVertexName is null"); + Objects.requireNonNull(componentClassName, "componentClassName is null"); + Objects.requireNonNull(componentType, "componentType is null"); + Objects.requireNonNull(componentVertexName, "componentVertexName is null"); this.requestedSize = requestedSize; this.componentClassName = componentClassName; this.componentType = componentType; diff --git a/tez-common/src/main/java/org/apache/tez/runtime/hook/TezDAGHook.java b/tez-common/src/main/java/org/apache/tez/runtime/hook/TezDAGHook.java new file mode 100644 index 0000000000..7fb015bdb1 --- /dev/null +++ b/tez-common/src/main/java/org/apache/tez/runtime/hook/TezDAGHook.java @@ -0,0 +1,44 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tez.runtime.hook; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.conf.Configuration; +import org.apache.tez.dag.records.TezDAGID; + +/** + * A hook which is instantiated and triggered before and after a DAG is executed. + */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +public interface TezDAGHook { + /** + * Invoked before the DAG starts. + * + * @param id the DAG id + * @param conf the conf + */ + void start(TezDAGID id, Configuration conf); + + /** + * Invoked after the DAG finishes. + */ + void stop(); +} diff --git a/tez-common/src/main/java/org/apache/tez/runtime/hook/TezTaskAttemptHook.java b/tez-common/src/main/java/org/apache/tez/runtime/hook/TezTaskAttemptHook.java new file mode 100644 index 0000000000..54931b64d5 --- /dev/null +++ b/tez-common/src/main/java/org/apache/tez/runtime/hook/TezTaskAttemptHook.java @@ -0,0 +1,44 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tez.runtime.hook; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.conf.Configuration; +import org.apache.tez.dag.records.TezTaskAttemptID; + +/** + * A hook which is instantiated and triggered before and after a task attempt is executed. + */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +public interface TezTaskAttemptHook { + /** + * Invoked before the task attempt starts. + * + * @param id the task attempt id + * @param conf the conf + */ + void start(TezTaskAttemptID id, Configuration conf); + + /** + * Invoked after the task attempt finishes. + */ + void stop(); +} diff --git a/tez-common/src/main/java/org/apache/tez/runtime/hook/package-info.java b/tez-common/src/main/java/org/apache/tez/runtime/hook/package-info.java new file mode 100644 index 0000000000..d977897d86 --- /dev/null +++ b/tez-common/src/main/java/org/apache/tez/runtime/hook/package-info.java @@ -0,0 +1,22 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +@Private +package org.apache.tez.runtime.hook; + +import org.apache.hadoop.classification.InterfaceAudience.Private; \ No newline at end of file diff --git a/tez-common/src/main/java/org/apache/tez/util/LoggingUtils.java b/tez-common/src/main/java/org/apache/tez/util/LoggingUtils.java new file mode 100644 index 0000000000..57575170c6 --- /dev/null +++ b/tez-common/src/main/java/org/apache/tez/util/LoggingUtils.java @@ -0,0 +1,153 @@ +/** +* Licensed to the Apache Software Foundation (ASF) under one +* or more contributor license agreements. See the NOTICE file +* distributed with this work for additional information +* regarding copyright ownership. The ASF licenses this file +* to you under the Apache License, Version 2.0 (the +* "License"); you may not use this file except in compliance +* with the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ + +package org.apache.tez.util; + +import java.lang.reflect.Constructor; +import java.lang.reflect.Field; +import java.lang.reflect.Modifier; +import java.util.Arrays; +import java.util.Hashtable; + +import org.apache.hadoop.conf.Configuration; +import org.apache.log4j.helpers.ThreadLocalMap; +import org.apache.tez.dag.api.TezConfiguration; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public final class LoggingUtils { + private static final Logger LOG = LoggerFactory.getLogger(LoggingUtils.class); + + private LoggingUtils() {} + + @SuppressWarnings("unchecked") + public static void initLoggingContext(ThreadLocalMap threadLocalMap, Configuration conf, + String dagId, String taskAttemptId) { + Hashtable data = (Hashtable) threadLocalMap.get(); + if (data == null) { + data = new NonClonableHashtable(); + threadLocalMap.set(data); + } + data.put("dagId", dagId == null ? "" : dagId); + data.put("taskAttemptId", taskAttemptId == null ? "" : taskAttemptId); + + String[] mdcKeys = conf.getStrings(TezConfiguration.TEZ_MDC_CUSTOM_KEYS); + + if (mdcKeys == null || mdcKeys.length == 0) { + return; + } + + String[] mdcKeysValuesFrom = conf.getStrings(TezConfiguration.TEZ_MDC_CUSTOM_KEYS_CONF_PROPS); + LOG.info("MDC_LOGGING: setting up MDC keys: keys: {} / conf: {}", Arrays.asList(mdcKeys), + Arrays.asList(mdcKeysValuesFrom)); + + int i = 0; + for (String mdcKey : mdcKeys) { + // don't want to fail on incorrect mdc key settings, but warn in app logs + if (mdcKey.isEmpty() || mdcKeysValuesFrom.length < i + 1) { + LOG.warn("cannot set mdc key: {}", mdcKey); + break; + } + + String mdcValue = mdcKeysValuesFrom[i] == null ? "" : conf.get(mdcKeysValuesFrom[i]); + // MDC is backed by a Hashtable, let's prevent NPE because of null values + if (mdcValue != null) { + data.put(mdcKey, mdcValue); + } else { + LOG.warn("MDC_LOGGING: mdc value is null for key: {}, config key: {}", mdcKey, + mdcKeysValuesFrom[i]); + } + + i++; + } + } + + public static String getPatternForAM(Configuration conf) { + String pattern = + conf.get(TezConfiguration.TEZ_LOG_PATTERN_LAYOUT_AM, TezConfiguration.TEZ_LOG_PATTERN_LAYOUT_DEFAULT); + return pattern.isEmpty() ? null : pattern; + } + + public static String getPatternForTask(Configuration conf) { + String pattern = + conf.get(TezConfiguration.TEZ_LOG_PATTERN_LAYOUT_TASK, TezConfiguration.TEZ_LOG_PATTERN_LAYOUT_DEFAULT); + return pattern.isEmpty() ? null : pattern; + } + + /** + * This method is for setting a NonClonableHashtable into log4j's mdc. Reflection hacks are + * needed, because MDC.mdc is well protected (final static MDC mdc = new MDC();). The logic below + * is supposed to be called once per JVM, so it's not a subject to performance bottlenecks. For + * further details of this solution, please check NonClonableHashtable class, which is set into + * the ThreadLocalMap. A wrong outcome of this method (any kind of runtime/reflection problems) + * should not affect the DAGAppMaster/TezChild. In case of an exception a ThreadLocalMap is + * returned, but it won't affect the content of the MDC. + */ + @SuppressWarnings("unchecked") + public static ThreadLocalMap setupLog4j() { + ThreadLocalMap mdcContext = new ThreadLocalMap(); + mdcContext.set(new NonClonableHashtable()); + + try { + final Constructor[] constructors = org.apache.log4j.MDC.class.getDeclaredConstructors(); + for (Constructor c : constructors) { + c.setAccessible(true); + } + + org.apache.log4j.MDC mdc = (org.apache.log4j.MDC) constructors[0].newInstance(); + Field tlmField = org.apache.log4j.MDC.class.getDeclaredField("tlm"); + tlmField.setAccessible(true); + tlmField.set(mdc, mdcContext); + + Field mdcField = org.apache.log4j.MDC.class.getDeclaredField("mdc"); + mdcField.setAccessible(true); + + Field modifiers = Field.class.getDeclaredField("modifiers"); + modifiers.setAccessible(true); + modifiers.setInt(mdcField, mdcField.getModifiers() & ~Modifier.FINAL); + + mdcField.set(null, mdc); + + } catch (Exception e) { + if (LOG.isDebugEnabled()) { + LOG.debug("Cannot set log4j global MDC, mdcContext won't be applied to log4j's MDC class", e); + } else { + LOG.warn("Cannot set log4j global MDC, mdcContext won't be applied to log4j's MDC class: {}", e.getMessage()); + } + } + + return mdcContext; + } + + /** + * NonClonableHashtable is a special class for hacking the log4j MDC context. By design, log4j's + * MDC uses a ThreadLocalMap, which clones parent thread's context before propagating it to child + * thread (see: @see {@link org.apache.log4j.helpers.ThreadLocalMap#childValue()}). In our + * usecase, this is not suitable, as we want to maintain only one context globally (and set e.g. + * dagId, taskAttemptId), then update it as easy as possible when dag/taskattempt changes, without + * having to propagate the update parameters to all the threads in the JVM. + */ + private static class NonClonableHashtable extends Hashtable { + private static final long serialVersionUID = 1L; + + @Override + public synchronized Object clone() { + return this; + } + } +} diff --git a/tez-common/src/main/java/org/apache/tez/util/StringInterner.java b/tez-common/src/main/java/org/apache/tez/util/StringInterner.java new file mode 100644 index 0000000000..b8c911307c --- /dev/null +++ b/tez-common/src/main/java/org/apache/tez/util/StringInterner.java @@ -0,0 +1,36 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tez.util; + +import com.google.common.collect.Interner; +import com.google.common.collect.Interners; + +/** + * A class to replace the {@code String.intern()}. The {@code String.intern()} + * has some well-known performance limitations, and should generally be avoided. + * Prefer Google's interner over the JDK's implementation. + */ +public final class StringInterner { + + private static final Interner STRING_INTERNER = + Interners.newWeakInterner(); + + private StringInterner() { + } + + public static String intern(final String str) { + return (str == null) ? null : STRING_INTERNER.intern(str); + } +} diff --git a/tez-common/src/main/java/org/apache/tez/util/TezRuntimeShutdownHandler.java b/tez-common/src/main/java/org/apache/tez/util/TezRuntimeShutdownHandler.java new file mode 100644 index 0000000000..4881e08ab9 --- /dev/null +++ b/tez-common/src/main/java/org/apache/tez/util/TezRuntimeShutdownHandler.java @@ -0,0 +1,44 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tez.util; + +import java.util.ArrayList; +import java.util.List; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public final class TezRuntimeShutdownHandler { + private static final Logger LOG = LoggerFactory.getLogger(TezRuntimeShutdownHandler.class); + + private static final List shutdownTasks = new ArrayList<>(); + + private TezRuntimeShutdownHandler() { + } + + public static void addShutdownTask(Runnable r) { + shutdownTasks.add(r); + } + + public static synchronized void shutdown() { + LOG.info("Handling {} shutdown tasks", shutdownTasks.size()); + for (Runnable shutdownTask : shutdownTasks) { + shutdownTask.run(); + } + } +} diff --git a/tez-common/src/main/javadoc/resources/META-INF/LICENSE.txt b/tez-common/src/main/javadoc/resources/META-INF/LICENSE similarity index 100% rename from tez-common/src/main/javadoc/resources/META-INF/LICENSE.txt rename to tez-common/src/main/javadoc/resources/META-INF/LICENSE diff --git a/tez-common/src/main/javadoc/resources/META-INF/NOTICE b/tez-common/src/main/javadoc/resources/META-INF/NOTICE new file mode 100644 index 0000000000..2595905699 --- /dev/null +++ b/tez-common/src/main/javadoc/resources/META-INF/NOTICE @@ -0,0 +1,6 @@ +Apache Tez +Copyright 2014-2024 The Apache Software Foundation + +This product includes software developed at +The Apache Software Foundation (http://www.apache.org/). + diff --git a/tez-common/src/main/javadoc/resources/META-INF/NOTICE.txt b/tez-common/src/main/javadoc/resources/META-INF/NOTICE.txt deleted file mode 100644 index 3f36fcc6ba..0000000000 --- a/tez-common/src/main/javadoc/resources/META-INF/NOTICE.txt +++ /dev/null @@ -1,6 +0,0 @@ -Apache Tez -Copyright (c) 2016 The Apache Software Foundation - -This product includes software developed at -The Apache Software Foundation (http://www.apache.org/). - diff --git a/tez-common/src/main/resources/META-INF/LICENSE.txt b/tez-common/src/main/resources/META-INF/LICENSE similarity index 100% rename from tez-common/src/main/resources/META-INF/LICENSE.txt rename to tez-common/src/main/resources/META-INF/LICENSE diff --git a/tez-common/src/main/resources/META-INF/NOTICE b/tez-common/src/main/resources/META-INF/NOTICE new file mode 100644 index 0000000000..2595905699 --- /dev/null +++ b/tez-common/src/main/resources/META-INF/NOTICE @@ -0,0 +1,6 @@ +Apache Tez +Copyright 2014-2024 The Apache Software Foundation + +This product includes software developed at +The Apache Software Foundation (http://www.apache.org/). + diff --git a/tez-common/src/main/resources/META-INF/NOTICE.txt b/tez-common/src/main/resources/META-INF/NOTICE.txt deleted file mode 100644 index 3f36fcc6ba..0000000000 --- a/tez-common/src/main/resources/META-INF/NOTICE.txt +++ /dev/null @@ -1,6 +0,0 @@ -Apache Tez -Copyright (c) 2016 The Apache Software Foundation - -This product includes software developed at -The Apache Software Foundation (http://www.apache.org/). - diff --git a/tez-common/src/test/java/org/apache/tez/common/MockDNSToSwitchMapping.java b/tez-common/src/test/java/org/apache/tez/common/MockDNSToSwitchMapping.java index ec6881f0ae..afcd687356 100644 --- a/tez-common/src/test/java/org/apache/tez/common/MockDNSToSwitchMapping.java +++ b/tez-common/src/test/java/org/apache/tez/common/MockDNSToSwitchMapping.java @@ -19,7 +19,10 @@ package org.apache.tez.common; import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; import java.util.List; +import java.util.Map; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.CommonConfigurationKeysPublic; @@ -32,6 +35,8 @@ * Overrides CachedDNSToSwitchMapping to ensure that it does not try to resolve hostnames */ public class MockDNSToSwitchMapping extends CachedDNSToSwitchMapping implements DNSToSwitchMapping { + private static final Map rackMap = + Collections.synchronizedMap(new HashMap()); private final String defaultRack = "/default-rack"; @@ -43,7 +48,11 @@ public MockDNSToSwitchMapping() { public List resolve(List strings) { List resolvedHosts = new ArrayList(); for (String h : strings) { - resolvedHosts.add(defaultRack); + String rack = rackMap.get(h); + if (rack == null) { + rack = defaultRack; + } + resolvedHosts.add(rack); } return resolvedHosts; } @@ -62,4 +71,7 @@ public static void initializeMockRackResolver() { RackResolver.init(rackResolverConf); } + public static void addRackMapping(String host, String rack) { + rackMap.put(host, rack); + } } diff --git a/tez-common/src/test/java/org/apache/tez/common/TestEnvironmentUpdateUtils.java b/tez-common/src/test/java/org/apache/tez/common/TestEnvironmentUpdateUtils.java deleted file mode 100644 index a9cecc216a..0000000000 --- a/tez-common/src/test/java/org/apache/tez/common/TestEnvironmentUpdateUtils.java +++ /dev/null @@ -1,96 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.tez.common; - -import static org.junit.Assert.assertEquals; - -import com.google.common.util.concurrent.ListenableFuture; -import com.google.common.util.concurrent.ListeningExecutorService; -import com.google.common.util.concurrent.MoreExecutors; -import com.google.common.util.concurrent.ThreadFactoryBuilder; -import org.junit.Test; - -import java.util.ArrayList; -import java.util.List; -import java.util.concurrent.Callable; -import java.util.concurrent.ExecutionException; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; -import java.util.concurrent.TimeUnit; - -public class TestEnvironmentUpdateUtils { - - @Test(timeout = 5000) - public void testMultipleUpdateEnvironment() { - EnvironmentUpdateUtils.put("test.environment1", "test.value1"); - EnvironmentUpdateUtils.put("test.environment2", "test.value2"); - assertEquals("Environment was not set propertly", "test.value1", System.getenv("test.environment1")); - assertEquals("Environment was not set propertly", "test.value2", System.getenv("test.environment2")); - } - - @Test(timeout = 5000) - public void testConcurrentRequests() throws InterruptedException { - int timeoutSecond = 5; - int concurThread = 10; - int exceptionCount = 0; - List> tasks = new ArrayList>(); - List> pendingTasks = new ArrayList>(); - final ExecutorService callbackExecutor = Executors.newFixedThreadPool(concurThread, - new ThreadFactoryBuilder().setDaemon(false).setNameFormat("CallbackExecutor").build()); - ListeningExecutorService taskExecutorService = - MoreExecutors.listeningDecorator(callbackExecutor); - while(concurThread > 0){ - ListenableFuture runningTaskFuture = - taskExecutorService.submit(new EnvironmentRequest()); - pendingTasks.add(runningTaskFuture); - concurThread--; - } - - //waiting for all threads submitted to thread pool - for (ListenableFuture future : pendingTasks) { - try { - future.get(); - } catch (ExecutionException e) { - exceptionCount++; - } - } - - //stop accepting new threads and shutdown threadpool - taskExecutorService.shutdown(); - try { - if(!taskExecutorService.awaitTermination(timeoutSecond, TimeUnit.SECONDS)) { - taskExecutorService.shutdownNow(); - } - } catch (InterruptedException ie) { - taskExecutorService.shutdownNow(); - } - - assertEquals(0, exceptionCount); - } - - private class EnvironmentRequest implements Callable { - - @Override - public Object call() throws Exception { - EnvironmentUpdateUtils.put("test.environment.concurrent" - +Thread.currentThread().getId(), "test.evironment.concurrent"); - return null; - } - } - } diff --git a/tez-common/src/test/java/org/apache/tez/common/TestTezUtils.java b/tez-common/src/test/java/org/apache/tez/common/TestTezUtils.java index 61bb9a7c11..d599cafd76 100644 --- a/tez-common/src/test/java/org/apache/tez/common/TestTezUtils.java +++ b/tez-common/src/test/java/org/apache/tez/common/TestTezUtils.java @@ -20,14 +20,19 @@ import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertEquals; import java.io.IOException; import java.util.BitSet; +import java.util.HashMap; +import java.util.Map; import java.util.Random; import org.apache.hadoop.conf.Configuration; import org.apache.tez.dag.api.TezConfiguration; import org.apache.tez.dag.api.UserPayload; +import org.apache.tez.dag.api.records.DAGProtos; import org.codehaus.jettison.json.JSONException; import org.codehaus.jettison.json.JSONObject; import org.junit.Assert; @@ -49,6 +54,55 @@ public void testByteStringToAndFromConf() throws IOException { checkConf(conf); } + private String constructLargeValue() { + int largeSizeMinimum = 64 * 1024 * 1024; + final String alphaString = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; + int largeSize = (largeSizeMinimum + alphaString.length() - 1) / alphaString.length(); + largeSize *= alphaString.length(); + assertTrue(largeSize >= alphaString.length()); + StringBuilder sb = new StringBuilder(largeSize); + + while (sb.length() < largeSize) { + sb.append(alphaString); + } + + String largeValue = sb.toString(); + Assert.assertEquals(largeSize, largeValue.length()); + return largeValue; + } + + private ByteString createByteString(Configuration conf, String largeValue) throws IOException { + conf.set("testLargeValue", largeValue); + Assert.assertEquals(conf.size(), 7); + return TezUtils.createByteStringFromConf(conf); + } + + @Test (timeout=20000) + public void testByteStringToAndFromLargeConf() throws IOException { + Configuration conf = getConf(); + String largeValue = constructLargeValue(); + ByteString bsConf = createByteString(conf, largeValue); + conf.clear(); + Assert.assertEquals(conf.size(), 0); + conf = TezUtils.createConfFromByteString(bsConf); + Assert.assertEquals(conf.size(), 7); + checkConf(conf); + Assert.assertEquals(conf.get("testLargeValue"), largeValue); + } + + @Test (timeout=20000) + public void testByteStringAddToLargeConf() throws IOException { + Configuration conf = getConf(); + String largeValue = constructLargeValue(); + ByteString bsConf = createByteString(conf, largeValue); + conf.clear(); + Assert.assertEquals(conf.size(), 0); + TezUtils.addToConfFromByteString(conf, bsConf); + Assert.assertEquals(conf.size(), 7); + checkConf(conf); + Assert.assertEquals(conf.get("testLargeValue"), largeValue); + } + @Test (timeout=2000) public void testPayloadToAndFromConf() throws IOException { Configuration conf = getConf(); @@ -76,11 +130,11 @@ public void testCleanVertexName() { public void testBitSetToByteArray() { BitSet bitSet = createBitSet(0); byte[] bytes = TezUtilsInternal.toByteArray(bitSet); - Assert.assertTrue(bytes.length == ((bitSet.length() / 8) + 1)); + Assert.assertEquals(bytes.length, (bitSet.length() + 7) / 8); bitSet = createBitSet(1000); bytes = TezUtilsInternal.toByteArray(bitSet); - Assert.assertTrue(bytes.length == ((bitSet.length() / 8) + 1)); + Assert.assertEquals(bytes.length, (bitSet.length() + 7) / 8); } @Test (timeout=2000) @@ -230,4 +284,14 @@ public void testConvertToHistoryTextWithReplaceVars() throws JSONException { } + @Test(timeout = 5000) + public void testPopulateConfProtoFromEntries() { + Map map = new HashMap<>(); + map.put("nonNullKey", "value"); + map.put("nullKey", null); + DAGProtos.ConfigurationProto.Builder confBuilder = DAGProtos.ConfigurationProto.newBuilder(); + TezUtils.populateConfProtoFromEntries(map.entrySet(), confBuilder); + assertEquals(confBuilder.getConfKeyValuesList().size(), 1); + } + } diff --git a/tez-common/src/test/java/org/apache/tez/dag/records/TestTezIds.java b/tez-common/src/test/java/org/apache/tez/dag/records/TestTezIds.java index 5e1552d345..10b62b2bd1 100644 --- a/tez-common/src/test/java/org/apache/tez/dag/records/TestTezIds.java +++ b/tez-common/src/test/java/org/apache/tez/dag/records/TestTezIds.java @@ -38,7 +38,7 @@ private void verifyDagInfo(String[] splits, TezDAGID dagId) { } private void verifyVertexInfo(String[] splits, TezVertexID vId) { - verifyDagInfo(splits, vId.getDAGId()); + verifyDagInfo(splits, vId.getDAGID()); Assert.assertEquals(vId.getId(), Integer.valueOf(splits[4]).intValue()); } diff --git a/tez-common/src/test/resources/META-INF/LICENSE.txt b/tez-common/src/test/resources/META-INF/LICENSE similarity index 100% rename from tez-common/src/test/resources/META-INF/LICENSE.txt rename to tez-common/src/test/resources/META-INF/LICENSE diff --git a/tez-common/src/test/resources/META-INF/NOTICE b/tez-common/src/test/resources/META-INF/NOTICE new file mode 100644 index 0000000000..2595905699 --- /dev/null +++ b/tez-common/src/test/resources/META-INF/NOTICE @@ -0,0 +1,6 @@ +Apache Tez +Copyright 2014-2024 The Apache Software Foundation + +This product includes software developed at +The Apache Software Foundation (http://www.apache.org/). + diff --git a/tez-common/src/test/resources/META-INF/NOTICE.txt b/tez-common/src/test/resources/META-INF/NOTICE.txt deleted file mode 100644 index 3f36fcc6ba..0000000000 --- a/tez-common/src/test/resources/META-INF/NOTICE.txt +++ /dev/null @@ -1,6 +0,0 @@ -Apache Tez -Copyright (c) 2016 The Apache Software Foundation - -This product includes software developed at -The Apache Software Foundation (http://www.apache.org/). - diff --git a/tez-dag/findbugs-exclude.xml b/tez-dag/findbugs-exclude.xml index c3e099e5b3..e8755832f4 100644 --- a/tez-dag/findbugs-exclude.xml +++ b/tez-dag/findbugs-exclude.xml @@ -92,6 +92,11 @@ + + + + + @@ -149,24 +154,8 @@ - - - - - - - - - - - - - - - - - + @@ -247,15 +236,16 @@ - + - - - - - - - + + + + + + + + diff --git a/tez-dag/pom.xml b/tez-dag/pom.xml index 76ecfe7311..779f7ed82f 100644 --- a/tez-dag/pom.xml +++ b/tez-dag/pom.xml @@ -19,7 +19,7 @@ org.apache.tez tez - 0.9.1-SNAPSHOT + 0.10.5-SNAPSHOT tez-dag @@ -31,6 +31,7 @@ org.apache.tez.dag.app.rm.container.AMContainerImpl Tez Tez.gv + ${project.build.directory}/logs tez-dag @@ -87,8 +88,8 @@ hadoop-yarn-server-web-proxy - org.apache.commons - commons-math3 + org.roaringbitmap + RoaringBitmap org.slf4j @@ -126,7 +127,7 @@ org.mockito - mockito-all + mockito-core test @@ -142,19 +143,15 @@ jettison - org.mortbay.jetty - jetty - compile - - - org.mortbay.jetty - jetty-util + javax.servlet + javax.servlet-api compile - javax.servlet - servlet-api - compile + org.apache.hadoop + hadoop-common + test-jar + test @@ -187,30 +184,57 @@ apache-rat-plugin - org.apache.hadoop - hadoop-maven-plugins + maven-antrun-plugin + + + generate-sources + generate-sources + + + + + + + + run + + + + + + org.apache.maven.plugins + maven-surefire-plugin + + + ${test.log.dir} + + + + + com.github.os72 + protoc-jar-maven-plugin - compile-protoc generate-sources - protoc + run - ${protobuf.version} + com.google.protobuf:protoc:${protobuf.version} ${protoc.path} - - ${basedir}/src/main/proto - ${basedir}/../tez-api/src/main/proto - ${basedir}/../tez-runtime-internals/src/main/proto - - - ${basedir}/src/main/proto - - HistoryEvents.proto - - - ${project.build.directory}/generated-sources/java + none + + ${basedir}/../tez-api/src/main/proto + ${basedir}/../tez-runtime-internals/src/main/proto + + + ${basedir}/src/main/proto + + + + ${project.build.directory}/generated-sources/java + + diff --git a/tez-dag/src/main/java/org/apache/tez/Utils.java b/tez-dag/src/main/java/org/apache/tez/Utils.java index 6f03a673de..1f9fb6a652 100644 --- a/tez-dag/src/main/java/org/apache/tez/Utils.java +++ b/tez-dag/src/main/java/org/apache/tez/Utils.java @@ -14,26 +14,43 @@ package org.apache.tez; +import javax.annotation.Nullable; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.yarn.event.Event; +import org.apache.tez.dag.api.records.DAGProtos; import org.apache.tez.dag.app.AppContext; import org.apache.tez.dag.app.dag.DAG; +import org.apache.tez.dag.app.dag.DAGScheduler; import org.apache.tez.dag.app.dag.DAGTerminationCause; +import org.apache.tez.dag.app.dag.Vertex; import org.apache.tez.dag.app.dag.event.DAGEventTerminateDag; import org.apache.tez.dag.records.TezDAGID; +import org.apache.tez.dag.utils.Graph; import org.apache.tez.serviceplugins.api.DagInfo; import org.apache.tez.serviceplugins.api.ServicePluginError; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import java.io.File; +import java.util.HashMap; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + @InterfaceAudience.Private -/** +/* * Utility class within the tez-dag module */ -public class Utils { +public final class Utils { private static final Logger LOG = LoggerFactory.getLogger(Utils.class); + /** + * Pattern to clean the labels in the .dot generation. + */ + private static Pattern sanitizeLabelPattern = Pattern.compile("[:\\-\\W]+"); + + private Utils() {} + public static String getContainerLauncherIdentifierString(int launcherIndex, AppContext appContext) { String name; try { @@ -92,6 +109,146 @@ public static void processNonFatalServiceErrorReport(String entityString, } } + /** + * Generate a visualization file. + * @param dag DAG. + * @param dagPB DAG plan. + * @param logDirs directories where the file will be written. + * @param scheduler scheduler that will provide the priorities + * of the vertexes. + */ + public static void generateDAGVizFile(final DAG dag, + final DAGProtos.DAGPlan dagPB, + final String[] logDirs, final @Nullable DAGScheduler scheduler) { + TezDAGID dagId = dag.getID(); + + HashMap nameToVertex = null; + if (scheduler != null) { + nameToVertex = new HashMap<>(dag.getVertices().size()); + for (Vertex v: dag.getVertices().values()) { + nameToVertex.put(v.getName(), v); + } + } + + Graph graph = new Graph(sanitizeLabelForViz(dagPB.getName())); + for (DAGProtos.VertexPlan vertexPlan : dagPB.getVertexList()) { + StringBuilder nodeLabel = new StringBuilder( + sanitizeLabelForViz(vertexPlan.getName()) + + "[" + getShortClassName( + vertexPlan.getProcessorDescriptor().getClassName())); + + if (scheduler != null) { + Vertex vertex = nameToVertex.get(vertexPlan.getName()); + if (vertex != null) { + try { + int priority = (scheduler.getPriorityLowLimit(dag, vertex) + + scheduler.getPriorityHighLimit(dag,vertex)) / 2; + nodeLabel.append(", priority=").append(priority).append("]"); + } catch (UnsupportedOperationException e) { + LOG.info("The DAG graphviz file with priorities will not" + + " be generate since the scheduler " + + scheduler.getClass().getSimpleName() + " doesn't" + + " override the methods to get the priorities"); + return; + } + } + } + Graph.Node n = graph.newNode(sanitizeLabelForViz(vertexPlan.getName()), + nodeLabel.toString()); + for (DAGProtos.RootInputLeafOutputProto input + : vertexPlan.getInputsList()) { + Graph.Node inputNode = graph.getNode( + sanitizeLabelForViz(vertexPlan.getName()) + + "_" + sanitizeLabelForViz(input.getName())); + inputNode.setLabel(sanitizeLabelForViz(vertexPlan.getName()) + + "[" + sanitizeLabelForViz(input.getName()) + "]"); + inputNode.setShape("box"); + inputNode.addEdge(n, "Input" + + " [inputClass=" + getShortClassName( + input.getIODescriptor().getClassName()) + + ", initializer=" + getShortClassName( + input.getControllerDescriptor().getClassName()) + "]"); + } + for (DAGProtos.RootInputLeafOutputProto output + : vertexPlan.getOutputsList()) { + Graph.Node outputNode = graph.getNode(sanitizeLabelForViz( + vertexPlan.getName()) + + "_" + sanitizeLabelForViz(output.getName())); + outputNode.setLabel(sanitizeLabelForViz(vertexPlan.getName()) + + "[" + sanitizeLabelForViz(output.getName()) + "]"); + outputNode.setShape("box"); + n.addEdge(outputNode, "Output" + + " [outputClass=" + getShortClassName( + output.getIODescriptor().getClassName()) + + ", committer=" + getShortClassName( + output.getControllerDescriptor().getClassName()) + "]"); + } + } + + for (DAGProtos.EdgePlan e : dagPB.getEdgeList()) { + + Graph.Node n = graph.getNode(sanitizeLabelForViz( + e.getInputVertexName())); + n.addEdge(graph.getNode(sanitizeLabelForViz( + e.getOutputVertexName())), + "[" + + "input=" + getShortClassName(e.getEdgeSource().getClassName()) + + ", output=" + getShortClassName( + e.getEdgeDestination().getClassName()) + + ", dataMovement=" + e.getDataMovementType().name().trim() + + ", schedulingType=" + + e.getSchedulingType().name().trim() + "]"); + } + + String outputFile = ""; + if (logDirs != null && logDirs.length != 0) { + outputFile += logDirs[0]; + outputFile += File.separator; + } else { + LOG.warn("DAGVizFile will be created under current (.) directory: {}," + + " which is not expected and recommended", new File(".").getAbsolutePath()); + } + outputFile += dagId.toString(); + // Means we have set the priorities + if (scheduler != null) { + outputFile += "_priority"; + } + outputFile += ".dot"; + + try { + LOG.info("Generating DAG graphviz file" + + ", dagId=" + dagId.toString() + + ", filePath=" + outputFile); + graph.save(outputFile); + } catch (Exception e) { + LOG.warn("Error occurred when trying to save graph structure" + + " for dag " + dagId.toString(), e); + } + } + + /** + * Get the short name of the class. + * @param className long name + * @return short name + */ + private static String getShortClassName(final String className) { + int pos = className.lastIndexOf("."); + if (pos != -1 && pos < className.length() - 1) { + return className.substring(pos + 1); + } + return className; + } + + /** + * Replace some characters with underscores. + * @param label label to sanitize + * @return the label with the replaced characters + */ + private static String sanitizeLabelForViz(final String label) { + Matcher m = sanitizeLabelPattern.matcher(label); + return m.replaceAll("_"); + } + @SuppressWarnings("unchecked") private static void sendEvent(AppContext appContext, Event event) { appContext.getEventHandler().handle(event); diff --git a/tez-dag/src/main/java/org/apache/tez/client/LocalClient.java b/tez-dag/src/main/java/org/apache/tez/client/LocalClient.java index 6baea482e9..73f60bdcc2 100644 --- a/tez-dag/src/main/java/org/apache/tez/client/LocalClient.java +++ b/tez-dag/src/main/java/org/apache/tez/client/LocalClient.java @@ -18,16 +18,18 @@ package org.apache.tez.client; -import java.io.File; import java.io.IOException; import java.net.InetAddress; import java.nio.ByteBuffer; import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.function.BiFunction; -import org.apache.hadoop.yarn.api.ApplicationConstants; import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationResponse; import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.FinalApplicationStatus; +import org.apache.hadoop.yarn.api.records.LocalResource; import org.apache.hadoop.yarn.client.api.YarnClientApplication; import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.exceptions.YarnException; @@ -45,21 +47,32 @@ import org.apache.hadoop.yarn.api.records.ApplicationReport; import org.apache.hadoop.yarn.api.records.YarnApplicationState; import org.apache.hadoop.yarn.util.SystemClock; +import org.apache.tez.common.AsyncDispatcher; import org.apache.tez.common.TezCommonUtils; import org.apache.tez.common.TezUtilsInternal; +import org.apache.tez.dag.api.DAGSubmissionTimedOut; +import org.apache.tez.dag.api.DagTypeConverters; import org.apache.tez.dag.api.TezConfiguration; import org.apache.tez.dag.api.TezException; +import org.apache.tez.dag.api.client.DAGClient; import org.apache.tez.dag.api.client.DAGClientHandler; -import org.apache.tez.dag.api.records.DAGProtos; +import org.apache.tez.dag.api.client.DAGClientImpl; +import org.apache.tez.dag.api.client.DAGClientImplLocal; +import org.apache.tez.dag.api.client.DAGStatus; +import org.apache.tez.dag.api.client.StatusGetOpts; +import org.apache.tez.dag.api.client.VertexStatus; +import org.apache.tez.dag.api.client.rpc.DAGClientAMProtocolRPC.SubmitDAGRequestProto; import org.apache.tez.dag.api.records.DAGProtos.AMPluginDescriptorProto; import org.apache.tez.dag.app.AppContext; import org.apache.tez.dag.app.DAGAppMaster; import org.apache.tez.dag.app.DAGAppMasterState; +import org.apache.tez.dag.app.LocalDAGAppMaster; import org.apache.tez.dag.app.dag.DAG; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.google.common.annotations.VisibleForTesting; +import com.google.protobuf.ServiceException; public class LocalClient extends FrameworkClient { public static final Logger LOG = LoggerFactory.getLogger(LocalClient.class); @@ -74,6 +87,10 @@ public class LocalClient extends FrameworkClient { private boolean isSession; private TezApiVersionInfo versionInfo = new TezApiVersionInfo(); private volatile Throwable amFailException = null; + private boolean isLocalWithoutNetwork; + private String amHost; + private int amPort; + private static final String localModeDAGSchedulerClassName = "org.apache.tez.dag.app.dag.impl.DAGSchedulerNaturalOrderControlled"; @@ -81,9 +98,8 @@ public LocalClient() { } @Override - public void init(TezConfiguration tezConf, YarnConfiguration yarnConf) { + public void init(TezConfiguration tezConf) { this.conf = tezConf; - tezConf.set("fs.defaultFS", "file:///"); // Tez libs already in the client's classpath this.conf.setBoolean(TezConfiguration.TEZ_IGNORE_LIB_URIS, true); this.conf.set(TezConfiguration.TEZ_AM_DAG_SCHEDULER_CLASS, localModeDAGSchedulerClassName); @@ -92,6 +108,10 @@ public void init(TezConfiguration tezConf, YarnConfiguration yarnConf) { // disable web service for local mode. this.conf.setBoolean(TezConfiguration.TEZ_AM_WEBSERVICE_ENABLE, false); + + this.isLocalWithoutNetwork = + tezConf.getBoolean(TezConfiguration.TEZ_LOCAL_MODE_WITHOUT_NETWORK, + TezConfiguration.TEZ_LOCAL_MODE_WITHOUT_NETWORK_DEFAULT); } @@ -140,7 +160,9 @@ public ApplicationId submitApplication(ApplicationSubmissionContext appContext) @Override public void killApplication(ApplicationId appId) { try { - clientHandler.shutdownAM(); + if (clientHandler != null){ + clientHandler.shutdownAM(); + } } catch (TezException e) { throw new RuntimeException(e); } @@ -173,7 +195,6 @@ public ApplicationReport getApplicationReport(ApplicationId appId) { report.setYarnApplicationState(convertDAGAppMasterState(dagAppMaster.getState())); report.setFinalApplicationStatus(convertDAGAppMasterStateToFinalYARNState(dagAppMaster.getState())); - List diagnostics = dagAppMaster.getDiagnostics(); if (diagnostics != null) { report.setDiagnostics(diagnostics.toString()); @@ -185,6 +206,9 @@ public ApplicationReport getApplicationReport(ApplicationId appId) { report.setProgress(dagAppMaster.getProgress()); report.setAMRMToken(null); + this.amHost = dagAppMaster.getAppNMHost(); + this.amPort = dagAppMaster.getRpcPort(); + return report; } @@ -247,7 +271,7 @@ protected void startDAGAppMaster(final ApplicationSubmissionContext appContext) if (dagAMState.equals(DAGAppMasterState.NEW)) { LOG.info("DAGAppMaster is not started wait for 100ms..."); } else if (dagAMState.equals(DAGAppMasterState.INITED)) { - LOG.info("DAGAppMaster is not startetd wait for 100ms..."); + LOG.info("DAGAppMaster is not started wait for 100ms..."); } else if (dagAMState.equals(DAGAppMasterState.ERROR)) { throw new TezException("DAGAppMaster got an error during initialization"); } else if (dagAMState.equals(DAGAppMasterState.KILLED)) { @@ -286,19 +310,43 @@ public void run() { try { ApplicationId appId = appContext.getApplicationId(); - // Set up working directory for DAGAppMaster + // Set up working directory for DAGAppMaster. + // The staging directory may be on the default file system, which may or may not + // be the local FS. For example, when using testing Hive against a pseudo-distributed + // cluster, it's useful for the default FS to be HDFS. Hive then puts its scratch + // directories on HDFS, and sets the Tez staging directory to be the session's + // scratch directory. + // + // To handle this case, we need to copy over the staging data back onto the + // local file system, where the rest of the Tez Child code expects it. + // + // NOTE: we base the local working directory path off of the staging path, even + // though it might be on a different file system. Typically they're both in a + // path starting with /tmp, but in the future we may want to use a different + // temp directory locally. Path staging = TezCommonUtils.getTezSystemStagingPath(conf, appId.toString()); - Path userDir = TezCommonUtils.getTezSystemStagingPath(conf, appId.toString()+"_wd"); + FileSystem stagingFs = staging.getFileSystem(conf); + + FileSystem localFs = FileSystem.getLocal(conf); + Path userDir = localFs.makeQualified(new Path(staging.toUri().getPath() + "_wd")); LOG.info("Using working directory: " + userDir.toUri().getPath()); - FileSystem fs = FileSystem.get(conf); // copy data from staging directory to working directory to simulate the resource localizing - FileUtil.copy(fs, staging, fs, userDir, false, conf); + FileUtil.copy(stagingFs, staging, localFs, userDir, false, conf); // Prepare Environment Path logDir = new Path(userDir, "localmode-log-dir"); Path localDir = new Path(userDir, "localmode-local-dir"); - fs.mkdirs(logDir); - fs.mkdirs(localDir); + + // fail fast if the local directories (on the paths that were used on HDFS) cannot be created + // in this case, user might want to choose a different staging path, which works on the local FS too + if (!localFs.mkdirs(logDir)) { + throw new IOException( + "Unable to create log directory, try to create it manually for further insights: " + logDir); + } + if (!localFs.mkdirs(localDir)) { + throw new IOException( + "Unable to create local directory, try to create it manually for further insights: " + localDir); + } UserGroupInformation.setConfiguration(conf); // Add session specific credentials to the AM credentials. @@ -321,12 +369,12 @@ public void run() { dagAppMaster = createDAGAppMaster(applicationAttemptId, cId, currentHost, nmPort, nmHttpPort, - new SystemClock(), appSubmitTime, isSession, userDir.toUri().getPath(), + SystemClock.getInstance(), appSubmitTime, isSession, userDir.toUri().getPath(), new String[] {localDir.toUri().getPath()}, new String[] {logDir.toUri().getPath()}, amCredentials, UserGroupInformation.getCurrentUser().getShortUserName()); DAGAppMaster.initAndStartAppMaster(dagAppMaster, conf); clientHandler = new DAGClientHandler(dagAppMaster); - + ((AsyncDispatcher)dagAppMaster.getDispatcher()).setDrainEventsOnStop(); } catch (Throwable t) { LOG.error("Error starting DAGAppMaster", t); if (dagAppMaster != null) { @@ -357,30 +405,89 @@ protected DAGAppMaster createDAGAppMaster(ApplicationAttemptId applicationAttemp // Read in additional information about external services AMPluginDescriptorProto amPluginDescriptorProto = - getPluginDescriptorInfo(conf, applicationAttemptId.getApplicationId().toString()); + TezUtilsInternal.readUserSpecifiedTezConfiguration(userDir) + .getAmPluginDescriptor(); + + return isLocalWithoutNetwork + ? new LocalDAGAppMaster(applicationAttemptId, cId, currentHost, nmPort, nmHttpPort, + SystemClock.getInstance(), appSubmitTime, isSession, userDir, localDirs, logDirs, + versionInfo.getVersion(), credentials, jobUserName, amPluginDescriptorProto) + : new DAGAppMaster(applicationAttemptId, cId, currentHost, nmPort, nmHttpPort, + SystemClock.getInstance(), appSubmitTime, isSession, userDir, localDirs, logDirs, + versionInfo.getVersion(), credentials, jobUserName, amPluginDescriptorProto); + } + + @Override + public TezAppMasterStatus getAMStatus(Configuration configuration, ApplicationId appId, + UserGroupInformation ugi) throws TezException, ServiceException, IOException { + if (isLocalWithoutNetwork) { + if (clientHandler == null) { + return TezAppMasterStatus.INITIALIZING; + } + return clientHandler.getTezAppMasterStatus(); + } + return super.getAMStatus(configuration, appId, ugi); + } + @Override + public DAGClient submitDag(org.apache.tez.dag.api.DAG dag, SubmitDAGRequestProto request, + String clientName, ApplicationId sessionAppId, long clientTimeout, UserGroupInformation ugi, + TezConfiguration tezConf) throws IOException, TezException, DAGSubmissionTimedOut { + + Map additionalResources = null; + if (request.hasAdditionalAmResources()) { + additionalResources = + DagTypeConverters.convertFromPlanLocalResources(request.getAdditionalAmResources()); + } - return new DAGAppMaster(applicationAttemptId, cId, currentHost, nmPort, nmHttpPort, - new SystemClock(), appSubmitTime, isSession, userDir, localDirs, logDirs, - versionInfo.getVersion(), credentials, jobUserName, amPluginDescriptorProto); + String dagId = dagAppMaster.submitDAGToAppMaster(request.getDAGPlan(), additionalResources); + return getDAGClient(sessionAppId, dagId, tezConf, ugi); } - private AMPluginDescriptorProto getPluginDescriptorInfo(Configuration conf, - String applicationIdString) throws - IOException { - Path tezSysStagingPath = TezCommonUtils - .getTezSystemStagingPath(conf, applicationIdString); - // Remove the filesystem qualifier. - String unqualifiedPath = tezSysStagingPath.toUri().getPath(); - - DAGProtos.ConfigurationProto confProto = - TezUtilsInternal - .readUserSpecifiedTezConfiguration(unqualifiedPath); - AMPluginDescriptorProto amPluginDescriptorProto = null; - if (confProto.hasAmPluginDescriptor()) { - amPluginDescriptorProto = confProto.getAmPluginDescriptor(); + @Override + public DAGClient getDAGClient(ApplicationId appId, String dagId, TezConfiguration tezConf, + UserGroupInformation ugi) { + return isLocalWithoutNetwork + ? new DAGClientImplLocal(appId, dagId, tezConf, this, ugi, new BiFunction, Long, DAGStatus>() { + @Override + public DAGStatus apply(Set statusOpts, Long timeout) { + try { + return clientHandler.getDAGStatus(dagId, statusOpts, timeout); + } catch (TezException e) { + throw new RuntimeException(e); + } + } + }, new BiFunction, String, VertexStatus>() { + @Override + public VertexStatus apply(Set statusOpts, String vertexName) { + try { + return clientHandler.getVertexStatus(dagId, vertexName, statusOpts); + } catch (TezException e) { + throw new RuntimeException(e); + } + } + }) : new DAGClientImpl(appId, dagId, tezConf, this, ugi); + } + + @Override + public boolean shutdownSession(Configuration configuration, ApplicationId sessionAppId, + UserGroupInformation ugi) throws TezException, IOException, ServiceException { + if (isLocalWithoutNetwork) { + if (clientHandler != null){ + clientHandler.shutdownAM(); + } + return true; } - return amPluginDescriptorProto; + return super.shutdownSession(configuration, sessionAppId, ugi); + } + + @Override + public String getAmHost() { + return amHost; } + @Override + public int getAmPort() { + return amPort; + } } diff --git a/tez-dag/src/main/java/org/apache/tez/dag/api/client/DAGClientHandler.java b/tez-dag/src/main/java/org/apache/tez/dag/api/client/DAGClientHandler.java index 618676d978..4ed9d86a34 100644 --- a/tez-dag/src/main/java/org/apache/tez/dag/api/client/DAGClientHandler.java +++ b/tez-dag/src/main/java/org/apache/tez/dag/api/client/DAGClientHandler.java @@ -32,6 +32,7 @@ import org.apache.hadoop.yarn.api.records.LocalResource; import org.apache.tez.client.TezAppMasterStatus; import org.apache.tez.dag.api.DAGNotRunningException; +import org.apache.tez.dag.api.NoCurrentDAGException; import org.apache.tez.dag.api.TezException; import org.apache.tez.dag.api.records.DAGProtos.DAGPlan; import org.apache.tez.dag.app.DAGAppMaster; @@ -94,15 +95,13 @@ DAG getDAG(String dagIdStr) throws TezException { DAG currentDAG = getCurrentDAG(); if (currentDAG == null) { - throw new TezException("No running dag at present"); + throw new NoCurrentDAGException(dagIdStr); } final String currentDAGIdStr = currentDAG.getID().toString(); if (!currentDAGIdStr.equals(dagIdStr)) { if (getAllDagIDs().contains(dagIdStr)) { - if (LOG.isDebugEnabled()) { - LOG.debug("Looking for finished dagId " + dagIdStr + " current dag is " + currentDAGIdStr); - } + LOG.debug("Looking for finished dagId {} current dag is {}", dagIdStr, currentDAGIdStr); throw new DAGNotRunningException("DAG " + dagIdStr + " Not running, current dag is " + currentDAGIdStr); } else { @@ -188,4 +187,7 @@ public long getLastHeartbeatTime() { return lastHeartbeatTime.get(); } + public String getWebUIAddress() { + return dagAppMaster.getWebUIAddress(); + } } diff --git a/tez-dag/src/main/java/org/apache/tez/dag/api/client/DAGClientServer.java b/tez-dag/src/main/java/org/apache/tez/dag/api/client/DAGClientServer.java index 14de870744..204024489f 100644 --- a/tez-dag/src/main/java/org/apache/tez/dag/api/client/DAGClientServer.java +++ b/tez-dag/src/main/java/org/apache/tez/dag/api/client/DAGClientServer.java @@ -128,7 +128,7 @@ private Server createServer(Class pbProtocol, InetSocketAddress addr, Configu BlockingService blockingService, String portRangeConfig) throws IOException { RPC.setProtocolEngine(conf, pbProtocol, ProtobufRpcEngine.class); RPC.Server server = new RPC.Builder(conf).setProtocol(pbProtocol) - .setInstance(blockingService).setBindAddress(addr.getHostName()) + .setInstance(blockingService).setBindAddress(addr.getHostString()) .setPort(addr.getPort()).setNumHandlers(numHandlers).setVerbose(false) .setPortRangeConfig(portRangeConfig).setSecretManager(secretManager) .build(); diff --git a/tez-dag/src/main/java/org/apache/tez/dag/api/client/DAGStatusBuilder.java b/tez-dag/src/main/java/org/apache/tez/dag/api/client/DAGStatusBuilder.java index 0002d8b604..931c6d05ca 100644 --- a/tez-dag/src/main/java/org/apache/tez/dag/api/client/DAGStatusBuilder.java +++ b/tez-dag/src/main/java/org/apache/tez/dag/api/client/DAGStatusBuilder.java @@ -61,6 +61,13 @@ public void addVertexProgress(String name, ProgressBuilder progress) { getBuilder().addVertexProgress(builder.build()); } + //TODO: let this be a map of values in protobuf 3.x + public void setMemoryUsage(long memoryUsedByAM, long memoryUsedByTasks) { + Builder builder = getBuilder(); + builder.setMemoryUsedByAM(memoryUsedByAM); + builder.setMemoryUsedByTasks(memoryUsedByTasks); + } + public DAGStatusProto getProto() { return getBuilder().build(); } diff --git a/tez-dag/src/main/java/org/apache/tez/dag/api/client/ProgressBuilder.java b/tez-dag/src/main/java/org/apache/tez/dag/api/client/ProgressBuilder.java index 538151899b..9dc13549cb 100644 --- a/tez-dag/src/main/java/org/apache/tez/dag/api/client/ProgressBuilder.java +++ b/tez-dag/src/main/java/org/apache/tez/dag/api/client/ProgressBuilder.java @@ -59,6 +59,10 @@ public void setKilledTaskAttemptCount(int count) { getBuilder().setKilledTaskAttemptCount(count); } + public void setRejectedTaskAttemptCount(int count) { + getBuilder().setRejectedTaskAttemptCount(count); + } + private ProgressProto.Builder getBuilder() { return (Builder) this.proxy; } diff --git a/tez-dag/src/main/java/org/apache/tez/dag/api/client/VertexStatusBuilder.java b/tez-dag/src/main/java/org/apache/tez/dag/api/client/VertexStatusBuilder.java index 4de321cf2c..0304fc9d6f 100644 --- a/tez-dag/src/main/java/org/apache/tez/dag/api/client/VertexStatusBuilder.java +++ b/tez-dag/src/main/java/org/apache/tez/dag/api/client/VertexStatusBuilder.java @@ -28,6 +28,7 @@ import org.apache.tez.dag.api.records.DAGProtos.VertexStatusStateProto; import org.apache.tez.dag.api.TezUncheckedException; import org.apache.tez.dag.app.dag.VertexState; +import org.apache.tez.dag.records.TezVertexID; public class VertexStatusBuilder extends VertexStatus { @@ -35,6 +36,10 @@ public VertexStatusBuilder() { super(VertexStatusProto.newBuilder()); } + public void setId(TezVertexID vertexId) { + getBuilder().setId(vertexId.toString()); + } + public void setState(VertexState state) { getBuilder().setState(getProtoState(state)); } diff --git a/tez-dag/src/main/java/org/apache/tez/dag/api/client/rpc/DAGClientAMProtocolBlockingPBServerImpl.java b/tez-dag/src/main/java/org/apache/tez/dag/api/client/rpc/DAGClientAMProtocolBlockingPBServerImpl.java index 72cf0d5642..5c24a27908 100644 --- a/tez-dag/src/main/java/org/apache/tez/dag/api/client/rpc/DAGClientAMProtocolBlockingPBServerImpl.java +++ b/tez-dag/src/main/java/org/apache/tez/dag/api/client/rpc/DAGClientAMProtocolBlockingPBServerImpl.java @@ -45,6 +45,8 @@ import org.apache.tez.dag.api.client.rpc.DAGClientAMProtocolRPC.GetDAGStatusResponseProto; import org.apache.tez.dag.api.client.rpc.DAGClientAMProtocolRPC.GetVertexStatusRequestProto; import org.apache.tez.dag.api.client.rpc.DAGClientAMProtocolRPC.GetVertexStatusResponseProto; +import org.apache.tez.dag.api.client.rpc.DAGClientAMProtocolRPC.GetWebUIAddressRequestProto; +import org.apache.tez.dag.api.client.rpc.DAGClientAMProtocolRPC.GetWebUIAddressResponseProto; import org.apache.tez.dag.api.client.rpc.DAGClientAMProtocolRPC.ShutdownSessionRequestProto; import org.apache.tez.dag.api.client.rpc.DAGClientAMProtocolRPC.ShutdownSessionResponseProto; import org.apache.tez.dag.api.client.rpc.DAGClientAMProtocolRPC.SubmitDAGRequestProto; @@ -166,7 +168,8 @@ public SubmitDAGResponseProto submitDAG(RpcController controller, if (request.hasSerializedRequestPath()) { // need to deserialize large request from hdfs Path requestPath = new Path(request.getSerializedRequestPath()); - try (FSDataInputStream fsDataInputStream = stagingFs.open(requestPath)) { + FileSystem fs = requestPath.getFileSystem(stagingFs.getConf()); + try (FSDataInputStream fsDataInputStream = fs.open(requestPath)) { CodedInputStream in = CodedInputStream.newInstance(fsDataInputStream); in.setSizeLimit(Integer.MAX_VALUE); @@ -183,7 +186,7 @@ public SubmitDAGResponseProto submitDAG(RpcController controller, } String dagId = real.submitDAG(dagPlan, additionalResources); return SubmitDAGResponseProto.newBuilder().setDagId(dagId).build(); - } catch(TezException e) { + } catch(IOException | TezException e) { throw wrapException(e); } } @@ -226,4 +229,10 @@ public GetAMStatusResponseProto getAMStatus(RpcController controller, } } + @Override + public GetWebUIAddressResponseProto getWebUIAddress(RpcController controller, GetWebUIAddressRequestProto request) + throws ServiceException { + String address = real.getWebUIAddress(); + return GetWebUIAddressResponseProto.newBuilder().setWebUiAddress(address).build(); + } } diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/AppContext.java b/tez-dag/src/main/java/org/apache/tez/dag/app/AppContext.java index b3d561aa31..c9a7083c1d 100644 --- a/tez-dag/src/main/java/org/apache/tez/dag/app/AppContext.java +++ b/tez-dag/src/main/java/org/apache/tez/dag/app/AppContext.java @@ -20,12 +20,12 @@ import java.util.Map; import java.util.Set; +import java.util.concurrent.ExecutorService; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.security.Credentials; -import org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterResponse; import org.apache.hadoop.yarn.api.records.ApplicationAccessType; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.ApplicationId; @@ -73,7 +73,7 @@ public interface AppContext { String getUser(); DAG getCurrentDAG(); - + ListeningExecutorService getExecService(); void setDAG(DAG dag); diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/ContainerContext.java b/tez-dag/src/main/java/org/apache/tez/dag/app/ContainerContext.java index f00b27b243..f2c7d5f700 100644 --- a/tez-dag/src/main/java/org/apache/tez/dag/app/ContainerContext.java +++ b/tez-dag/src/main/java/org/apache/tez/dag/app/ContainerContext.java @@ -21,6 +21,7 @@ import java.util.HashMap; import java.util.Map; import java.util.Map.Entry; +import java.util.Objects; import javax.annotation.Nullable; @@ -33,8 +34,6 @@ import org.apache.hadoop.yarn.api.records.LocalResourceType; import org.apache.tez.dag.app.dag.Vertex; -import com.google.common.base.Preconditions; - public class ContainerContext { private static final Logger LOG = LoggerFactory.getLogger(ContainerContext.class); @@ -49,11 +48,11 @@ public class ContainerContext { public ContainerContext(Map localResources, Credentials credentials, Map environment, String javaOpts) { - Preconditions.checkNotNull(localResources, + Objects.requireNonNull(localResources, "localResources should not be null"); - Preconditions.checkNotNull(credentials, "credentials should not be null"); - Preconditions.checkNotNull(environment, "environment should not be null"); - Preconditions.checkNotNull(javaOpts, "javaOpts should not be null"); + Objects.requireNonNull(credentials, "credentials should not be null"); + Objects.requireNonNull(environment, "environment should not be null"); + Objects.requireNonNull(javaOpts, "javaOpts should not be null"); this.localResources = localResources; this.credentials = credentials; this.environment = environment; @@ -64,11 +63,11 @@ public ContainerContext(Map localResources, public ContainerContext(Map localResources, Credentials credentials, Map environment, String javaOpts, @Nullable Vertex vertex) { - Preconditions.checkNotNull(localResources, + Objects.requireNonNull(localResources, "localResources should not be null"); - Preconditions.checkNotNull(credentials, "credentials should not be null"); - Preconditions.checkNotNull(environment, "environment should not be null"); - Preconditions.checkNotNull(javaOpts, "javaOpts should not be null"); + Objects.requireNonNull(credentials, "credentials should not be null"); + Objects.requireNonNull(environment, "environment should not be null"); + Objects.requireNonNull(javaOpts, "javaOpts should not be null"); this.localResources = localResources; this.credentials = credentials; this.environment = environment; @@ -97,7 +96,7 @@ public String getJavaOpts() { * container context. */ public boolean isSuperSet(ContainerContext otherContext) { - Preconditions.checkNotNull(otherContext, "otherContext should not null"); + Objects.requireNonNull(otherContext, "otherContext should not null"); // Assumptions: // Credentials are the same for all containers belonging to a DAG. // Matching can be added if containers are used across DAGs @@ -129,26 +128,24 @@ public boolean isExactMatch(ContainerContext otherContext) { // classpath modification private static boolean localResourcesCompatible(Map srcLRs, Map reqLRs) { - Map reqLRsCopy = new HashMap(reqLRs); - for (Entry srcLREntry : srcLRs.entrySet()) { - LocalResource requestedLocalResource = reqLRsCopy.remove(srcLREntry.getKey()); - if (requestedLocalResource != null && !srcLREntry.getValue().equals(requestedLocalResource)) { + for (Entry reqLREntry : reqLRs.entrySet()) { + LocalResource requestedLocalResource = srcLRs.get(reqLREntry.getKey()); + if (requestedLocalResource == null) { + LocalResource lr = reqLREntry.getValue(); + if (!LocalResourceType.FILE.equals(lr.getType())) { + if (LOG.isDebugEnabled()) { + LOG.debug("Cannot match container: Additional local resource needed is not of type FILE" + + ", resourceName: " + reqLREntry.getKey() + + ", resourceDetails: " + reqLREntry); + } + return false; + } + } else if(!reqLREntry.getValue().equals(requestedLocalResource)) { if (LOG.isDebugEnabled()) { LOG.debug("Cannot match container: Attempting to use same target resource name: " - + srcLREntry.getKey() + + reqLREntry.getKey() + ", but with different source resources. Already localized: " - + srcLREntry.getValue() + ", requested: " + requestedLocalResource); - } - return false; - } - } - for (Entry additionalLREntry : reqLRsCopy.entrySet()) { - LocalResource lr = additionalLREntry.getValue(); - if (EnumSet.of(LocalResourceType.ARCHIVE, LocalResourceType.PATTERN).contains(lr.getType())) { - if (LOG.isDebugEnabled()) { - LOG.debug("Cannot match container: Additional local resource needed is not of type FILE" - + ", resourceName: " + additionalLREntry.getKey() - + ", resourceDetails: " + additionalLREntry); + + requestedLocalResource + ", requested: " + reqLREntry.getValue()); } return false; } @@ -161,24 +158,14 @@ private static boolean isSuperSet(Map srcMap, Map matchMap, for (Entry oEntry : matchMap.entrySet()) { K oKey = oEntry.getKey(); V oVal = oEntry.getValue(); - if (srcMap.containsKey(oKey)) { - if (!oVal.equals(srcMap.get(oKey))) { - if (LOG.isDebugEnabled()) { - LOG.debug("Incompatible container context" + V srcVal = srcMap.get(oKey); + if (!oVal.equals(srcVal)) { + if (LOG.isDebugEnabled()) { + LOG.debug("Incompatible container context" + ", matchInfo=" + matchInfo + ", thisKey=" + oKey - + ", thisVal=" + srcMap.get(oKey) + + ", thisVal=" + srcVal + ", otherVal=" + oVal); - } - return false; - } - } else { - if (LOG.isDebugEnabled()) { - LOG.debug("Incompatible container context" - + ", matchInfo=" + matchInfo - + ", thisKey=" + oKey - + ", thisVal=null" - + ", otherVal=" + oVal); } return false; } diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/ContainerLauncherContextImpl.java b/tez-dag/src/main/java/org/apache/tez/dag/app/ContainerLauncherContextImpl.java index 7e68675b24..26637967ff 100644 --- a/tez-dag/src/main/java/org/apache/tez/dag/app/ContainerLauncherContextImpl.java +++ b/tez-dag/src/main/java/org/apache/tez/dag/app/ContainerLauncherContextImpl.java @@ -16,10 +16,12 @@ import javax.annotation.Nullable; -import com.google.common.base.Preconditions; +import java.util.Objects; + import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.tez.common.TezUtilsInternal; +import org.apache.tez.common.counters.DAGCounter; import org.apache.tez.dag.api.UserPayload; import org.apache.tez.dag.app.dag.event.DAGAppMasterEventType; import org.apache.tez.dag.app.dag.event.DAGAppMasterEventUserServiceFatalError; @@ -52,9 +54,9 @@ public class ContainerLauncherContextImpl implements ContainerLauncherContext { public ContainerLauncherContextImpl(AppContext appContext, ContainerLauncherManager containerLauncherManager, TaskCommunicatorManagerInterface tal, UserPayload initialUserPayload, int containerLauncherIndex) { - Preconditions.checkNotNull(appContext, "AppContext cannot be null"); - Preconditions.checkNotNull(appContext, "ContainerLauncherManager cannot be null"); - Preconditions.checkNotNull(tal, "TaskCommunicator cannot be null"); + Objects.requireNonNull(appContext, "AppContext cannot be null"); + Objects.requireNonNull(appContext, "ContainerLauncherManager cannot be null"); + Objects.requireNonNull(tal, "TaskCommunicator cannot be null"); this.context = appContext; this.containerLauncherManager = containerLauncherManager; this.tal = tal; @@ -64,13 +66,13 @@ public ContainerLauncherContextImpl(AppContext appContext, ContainerLauncherMana @Override public void containerLaunched(ContainerId containerId) { + context.getCurrentDAG().incrementDagCounter(DAGCounter.TOTAL_CONTAINER_LAUNCH_COUNT, 1); context.getEventHandler().handle( new AMContainerEventLaunched(containerId)); ContainerLaunchedEvent lEvt = new ContainerLaunchedEvent( containerId, context.getClock().getTime(), context.getApplicationAttemptId()); context.getHistoryHandler().handle(new DAGHistoryEvent( null, lEvt)); - } @Override @@ -140,7 +142,7 @@ public Object getTaskCommunicatorMetaInfo(String taskCommName) { @Override public void reportError(ServicePluginError servicePluginError, String message, DagInfo dagInfo) { - Preconditions.checkNotNull(servicePluginError, "ServiceError must be specified"); + Objects.requireNonNull(servicePluginError, "ServiceError must be specified"); containerLauncherManager.reportError(containerLauncherIndex, servicePluginError, message, dagInfo); } diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/DAGAppMaster.java b/tez-dag/src/main/java/org/apache/tez/dag/app/DAGAppMaster.java index c4b8df0fd8..4172a5a368 100644 --- a/tez-dag/src/main/java/org/apache/tez/dag/app/DAGAppMaster.java +++ b/tez-dag/src/main/java/org/apache/tez/dag/app/DAGAppMaster.java @@ -18,8 +18,9 @@ package org.apache.tez.dag.app; -import static com.google.common.base.Preconditions.checkNotNull; + +import java.io.BufferedInputStream; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; @@ -35,6 +36,7 @@ import java.util.Arrays; import java.util.Calendar; import java.util.Collections; +import java.util.Date; import java.util.EnumSet; import java.util.HashMap; import java.util.HashSet; @@ -55,8 +57,7 @@ import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.locks.Lock; import java.util.concurrent.locks.ReentrantReadWriteLock; -import java.util.regex.Matcher; -import java.util.regex.Pattern; +import java.util.Objects; import com.google.common.collect.BiMap; import com.google.common.collect.HashBiMap; @@ -66,12 +67,16 @@ import org.apache.commons.cli.GnuParser; import org.apache.commons.cli.Options; import org.apache.commons.lang.exception.ExceptionUtils; +import org.apache.hadoop.yarn.api.records.NodeId; +import org.apache.tez.Utils; import org.apache.tez.client.CallerContext; import org.apache.tez.client.TezClientUtils; +import org.apache.tez.common.ReflectionUtils; import org.apache.tez.common.TezUtils; import org.apache.tez.dag.api.NamedEntityDescriptor; import org.apache.tez.dag.api.SessionNotRunning; import org.apache.tez.dag.api.UserPayload; +import org.apache.tez.dag.api.records.DAGProtos; import org.apache.tez.dag.api.records.DAGProtos.AMPluginDescriptorProto; import org.apache.tez.dag.api.records.DAGProtos.ConfigurationProto; import org.apache.tez.dag.api.records.DAGProtos.TezNamedEntityDescriptorProto; @@ -81,8 +86,6 @@ import org.apache.tez.dag.app.dag.event.DAGEventInternalError; import org.apache.tez.dag.app.dag.event.DAGEventTerminateDag; import org.apache.tez.dag.history.events.DAGRecoveredEvent; -import org.apache.tez.dag.records.TezTaskAttemptID; -import org.apache.tez.dag.records.TezTaskID; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; @@ -103,7 +106,6 @@ import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.LocalResource; -import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.event.Dispatcher; import org.apache.hadoop.yarn.event.Event; import org.apache.hadoop.yarn.event.EventHandler; @@ -112,13 +114,16 @@ import org.apache.hadoop.yarn.util.ConverterUtils; import org.apache.hadoop.yarn.util.ResourceCalculatorProcessTree; import org.apache.hadoop.yarn.util.SystemClock; +import org.apache.log4j.helpers.ThreadLocalMap; import org.apache.tez.common.AsyncDispatcher; import org.apache.tez.common.AsyncDispatcherConcurrent; import org.apache.tez.common.GcTimeUpdater; +import org.apache.tez.common.TezClassLoader; import org.apache.tez.common.TezCommonUtils; import org.apache.tez.common.TezConverterUtils; import org.apache.tez.common.TezUtilsInternal; import org.apache.tez.common.VersionInfo; +import org.apache.tez.common.counters.DAGCounter; import org.apache.tez.common.counters.Limits; import org.apache.tez.common.security.ACLManager; import org.apache.tez.common.security.JobTokenIdentifier; @@ -131,7 +136,6 @@ import org.apache.tez.dag.api.TezUncheckedException; import org.apache.tez.dag.api.client.DAGClientHandler; import org.apache.tez.dag.api.client.DAGClientServer; -import org.apache.tez.dag.api.records.DAGProtos; import org.apache.tez.dag.api.records.DAGProtos.DAGPlan; import org.apache.tez.dag.api.records.DAGProtos.PlanLocalResourcesProto; import org.apache.tez.dag.api.records.DAGProtos.VertexPlan; @@ -178,12 +182,14 @@ import org.apache.tez.dag.history.events.DAGSubmittedEvent; import org.apache.tez.dag.history.utils.DAGUtils; import org.apache.tez.dag.records.TezDAGID; +import org.apache.tez.dag.records.TezTaskAttemptID; import org.apache.tez.dag.records.TezVertexID; -import org.apache.tez.dag.utils.Graph; import org.apache.tez.dag.utils.RelocalizationUtils; import org.apache.tez.dag.utils.Simple2LevelVersionComparator; import org.apache.tez.hadoop.shim.HadoopShim; import org.apache.tez.hadoop.shim.HadoopShimsLoader; +import org.apache.tez.runtime.hook.TezDAGHook; +import org.apache.tez.util.LoggingUtils; import org.apache.tez.util.TezMxBeanResourceCalculator; import org.codehaus.jettison.json.JSONException; import org.slf4j.Logger; @@ -191,8 +197,7 @@ import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Function; -import com.google.common.base.Joiner; -import com.google.common.base.Preconditions; +import org.apache.tez.common.Preconditions; import com.google.common.collect.Maps; import com.google.common.util.concurrent.ListeningExecutorService; import com.google.common.util.concurrent.MoreExecutors; @@ -225,9 +230,6 @@ public class DAGAppMaster extends AbstractService { * Priority of the DAGAppMaster shutdown hook. */ public static final int SHUTDOWN_HOOK_PRIORITY = 30; - private static final Joiner PATH_JOINER = Joiner.on('/'); - - private static Pattern sanitizeLabelPattern = Pattern.compile("[:\\-\\W]+"); @VisibleForTesting static final String INVALID_SESSION_ERR_MSG = "Initial application attempt in session mode failed. " @@ -265,6 +267,7 @@ public class DAGAppMaster extends AbstractService { private DagEventDispatcher dagEventDispatcher; private VertexEventDispatcher vertexEventDispatcher; private TaskSchedulerManager taskSchedulerManager; + private DAGAppMasterReadinessService appMasterReadinessService; private WebUIService webUIService; private HistoryEventHandler historyEventHandler; private final Map amResources = new HashMap(); @@ -303,8 +306,7 @@ public class DAGAppMaster extends AbstractService { private Path currentRecoveryDataDir; private Path tezSystemStagingDir; private FileSystem recoveryFS; - - private ExecutorService rawExecutor; + private ListeningExecutorService execService; // TODO May not need to be a bidi map @@ -315,7 +317,6 @@ public class DAGAppMaster extends AbstractService { /** * set of already executed dag names. */ - Set dagNames = new HashSet(); Set dagIDs = new HashSet(); protected boolean isLastAMRetry = false; @@ -335,13 +336,15 @@ public class DAGAppMaster extends AbstractService { private String clientVersion; private boolean versionMismatch = false; private String versionMismatchDiagnostics; - + private ResourceCalculatorProcessTree cpuPlugin; private GcTimeUpdater gcPlugin; // must be LinkedHashMap to preserve order of service addition Map services = new LinkedHashMap(); + private ThreadLocalMap mdcContext; + private TezDAGHook[] hooks = {}; public DAGAppMaster(ApplicationAttemptId applicationAttemptId, ContainerId containerId, String nmHost, int nmPort, int nmHttpPort, @@ -349,6 +352,7 @@ public DAGAppMaster(ApplicationAttemptId applicationAttemptId, String [] localDirs, String[] logDirs, String clientVersion, Credentials credentials, String jobUserName, AMPluginDescriptorProto pluginDescriptorProto) { super(DAGAppMaster.class.getName()); + this.mdcContext = LoggingUtils.setupLog4j(); this.clock = clock; this.startTime = clock.getTime(); this.appSubmitTime = appSubmitTime; @@ -375,22 +379,20 @@ public DAGAppMaster(ApplicationAttemptId applicationAttemptId, this.containerID.toString(), this.appMasterUgi.getShortUserName()); LOG.info("Created DAGAppMaster for application " + applicationAttemptId - + ", versionInfo=" + dagVersionInfo.toString()); - + + ", versionInfo=" + dagVersionInfo); + TezCommonUtils.logCredentials(LOG, this.appMasterUgi.getCredentials(), "am"); } // Pull this WebAppUtils function into Tez until YARN-4186 - public static String getRunningLogURL(String nodeHttpAddress, + private static String getRunningLogURL(String nodeHttpAddress, String containerId, String user) { - if (nodeHttpAddress == null || nodeHttpAddress.isEmpty() - || containerId == null || containerId.isEmpty() || user == null - || user.isEmpty()) { + if (containerId.isEmpty() || user == null | user.isEmpty()) { return null; } - return PATH_JOINER.join(nodeHttpAddress, "node", "containerlogs", + return String.format("%s/node/containerlogs/%s/%s", nodeHttpAddress, containerId, user); } - + private void initResourceCalculatorPlugins() { Class clazz = amConf.getClass( TezConfiguration.TEZ_TASK_RESOURCE_CALCULATOR_PROCESS_TREE_CLASS, @@ -405,10 +407,10 @@ private void initResourceCalculatorPlugins() { pid = processName.split("@")[0]; } cpuPlugin = ResourceCalculatorProcessTree.getResourceCalculatorProcessTree(pid, clazz, amConf); - + gcPlugin = new GcTimeUpdater(null); } - + private long getAMCPUTime() { if (cpuPlugin != null) { cpuPlugin.updateProcessTree(); @@ -425,7 +427,7 @@ private long getAMGCTime() { } @Override - public synchronized void serviceInit(final Configuration conf) throws Exception { + protected void serviceInit(final Configuration conf) throws Exception { this.amConf = conf; initResourceCalculatorPlugins(); @@ -510,8 +512,7 @@ public synchronized void serviceInit(final Configuration conf) throws Exception recoveryEnabled = conf.getBoolean(TezConfiguration.DAG_RECOVERY_ENABLED, TezConfiguration.DAG_RECOVERY_ENABLED_DEFAULT); - clientRpcServer = new DAGClientServer(clientHandler, appAttemptID, recoveryFS); - addIfService(clientRpcServer, true); + initClientRpcServer(); taskHeartbeatHandler = createTaskHeartbeatHandler(context, conf); addIfService(taskHeartbeatHandler, true); @@ -562,14 +563,14 @@ public synchronized void serviceInit(final Configuration conf) throws Exception dispatcher.register(TaskEventType.class, new TaskEventDispatcher()); dispatcher.register(TaskAttemptEventType.class, new TaskAttemptEventDispatcher()); } else { - int concurrency = conf.getInt(TezConfiguration.TEZ_AM_CONCURRENT_DISPATCHER_CONCURRENCY, + int concurrency = conf.getInt(TezConfiguration.TEZ_AM_CONCURRENT_DISPATCHER_CONCURRENCY, TezConfiguration.TEZ_AM_CONCURRENT_DISPATCHER_CONCURRENCY_DEFAULT); AsyncDispatcherConcurrent sharedDispatcher = dispatcher.registerAndCreateDispatcher( TaskEventType.class, new TaskEventDispatcher(), "TaskAndAttemptEventThread", concurrency); dispatcher.registerWithExistingDispatcher(TaskAttemptEventType.class, new TaskAttemptEventDispatcher(), sharedDispatcher); } - + // register other delegating dispatchers dispatcher.registerAndCreateDispatcher(SpeculatorEventType.class, new SpeculatorEventHandler(), "Speculator"); @@ -578,9 +579,7 @@ public synchronized void serviceInit(final Configuration conf) throws Exception this.webUIService = new WebUIService(context); addIfService(webUIService, false); } else { - if (LOG.isDebugEnabled()) { - LOG.debug("Web UI Service is not enabled."); - } + LOG.debug("Web UI Service is not enabled."); } this.taskSchedulerManager = createTaskSchedulerManager(taskSchedulerDescriptors); @@ -594,6 +593,8 @@ public synchronized void serviceInit(final Configuration conf) throws Exception taskSchedulerManager); addIfServiceDependency(taskSchedulerManager, clientRpcServer); + appMasterReadinessService = createAppMasterReadinessService(); + this.containerLauncherManager = createContainerLauncherManager(containerLauncherDescriptors, isLocal); addIfService(containerLauncherManager, true); @@ -608,23 +609,25 @@ public synchronized void serviceInit(final Configuration conf) throws Exception if (!versionMismatch) { if (isSession) { - FileInputStream sessionResourcesStream = null; - try { - sessionResourcesStream = new FileInputStream( - new File(workingDirectory, TezConstants.TEZ_AM_LOCAL_RESOURCES_PB_FILE_NAME)); + try (BufferedInputStream sessionResourcesStream = + new BufferedInputStream( + new FileInputStream(new File(workingDirectory, + TezConstants.TEZ_AM_LOCAL_RESOURCES_PB_FILE_NAME)))) { PlanLocalResourcesProto amLocalResourceProto = PlanLocalResourcesProto .parseDelimitedFrom(sessionResourcesStream); - amResources.putAll(DagTypeConverters.convertFromPlanLocalResources(amLocalResourceProto)); - } finally { - if (sessionResourcesStream != null) { - sessionResourcesStream.close(); - } + amResources.putAll(DagTypeConverters + .convertFromPlanLocalResources(amLocalResourceProto)); } } } - rawExecutor = Executors.newCachedThreadPool(new ThreadFactoryBuilder().setDaemon(true) - .setNameFormat("App Shared Pool - " + "#%d").build()); + int threadCount = conf.getInt(TezConfiguration.TEZ_AM_DAG_APPCONTEXT_THREAD_COUNT_LIMIT, + TezConfiguration.TEZ_AM_DAG_APPCONTEXT_THREAD_COUNT_LIMIT_DEFAULT); + // NOTE: LinkedBlockingQueue does not have a capacity Limit and can thus + // occupy large memory chunks when numerous Runables are pending for execution + ExecutorService rawExecutor = + Executors.newFixedThreadPool(threadCount, new ThreadFactoryBuilder() + .setDaemon(true).setNameFormat("App Shared Pool - #%d").build()); execService = MoreExecutors.listeningDecorator(rawExecutor); initServices(conf); @@ -649,6 +652,11 @@ public synchronized void serviceInit(final Configuration conf) throws Exception } } + protected void initClientRpcServer() { + clientRpcServer = new DAGClientServer(clientHandler, appAttemptID, recoveryFS); + addIfService(clientRpcServer, true); + } + @VisibleForTesting protected DAGAppMasterShutdownHandler createShutdownHandler() { return new DAGAppMasterShutdownHandler(); @@ -662,11 +670,20 @@ protected TaskSchedulerManager createTaskSchedulerManager( taskSchedulerDescriptors, isLocal, hadoopShim); } + @VisibleForTesting + protected DAGAppMasterReadinessService createAppMasterReadinessService() { + DAGAppMasterReadinessService service = + new DAGAppMasterReadinessService(DAGAppMasterReadinessService.class.getName()); + addIfService(service, false); + addIfServiceDependency(service, taskSchedulerManager); + return service; + } + @VisibleForTesting protected ContainerSignatureMatcher createContainerSignatureMatcher() { return new ContainerContextMatcher(); } - + @VisibleForTesting protected AsyncDispatcher createDispatcher() { return new AsyncDispatcher("Central"); @@ -685,7 +702,7 @@ protected void sysexit() { System.exit(0); } } - + @VisibleForTesting protected TaskSchedulerManager getTaskSchedulerManager() { return taskSchedulerManager; @@ -694,9 +711,8 @@ protected TaskSchedulerManager getTaskSchedulerManager() { private void handleInternalError(String errDiagnosticsPrefix, String errDiagDagEvent) { state = DAGAppMasterState.ERROR; if (currentDAG != null) { - _updateLoggers(currentDAG, "_post"); - String errDiagnostics = errDiagnosticsPrefix + ". Aborting dag: " + currentDAG.getID(); - LOG.info(errDiagnostics); + updateLoggers(currentDAG, "_post"); + LOG.info(errDiagnosticsPrefix + ". Aborting dag: " + currentDAG.getID()); // Inform the current DAG about the error sendEvent(new DAGEventInternalError(currentDAG.getID(), errDiagDagEvent)); } else { @@ -755,25 +771,29 @@ protected synchronized void handle(DAGAppMasterEvent event) { "DAGAppMaster Internal Error occurred"); break; case DAG_FINISHED: + for (TezDAGHook hook : hooks) { + hook.stop(); + } DAGAppMasterEventDAGFinished finishEvt = (DAGAppMasterEventDAGFinished) event; String timeStamp = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").format(Calendar.getInstance().getTime()); - System.err.println(timeStamp + " Completed Dag: " + finishEvt.getDAGId().toString()); - System.out.println(timeStamp + " Completed Dag: " + finishEvt.getDAGId().toString()); + System.err.println(timeStamp + " Completed Dag: " + finishEvt.getDAGId()); + System.out.println(timeStamp + " Completed Dag: " + finishEvt.getDAGId()); + + currentDAG.onFinish(); + if (!isSession) { LOG.info("Not a session, AM will unregister as DAG has completed"); this.taskSchedulerManager.setShouldUnregisterFlag(); - _updateLoggers(currentDAG, "_post"); + updateLoggers(currentDAG, "_post"); setStateOnDAGCompletion(); - LOG.info("Shutting down on completion of dag:" + - finishEvt.getDAGId().toString()); + LOG.info("Shutting down on completion of dag:" + finishEvt.getDAGId()); shutdownHandler.shutdown(); } else { - LOG.info("DAG completed, dagId=" - + finishEvt.getDAGId().toString() - + ", dagState=" + finishEvt.getDAGState()); + LOG.info("DAG completed, dagId=" + finishEvt.getDAGId() + ", dagState=" + + finishEvt.getDAGState()); lastDAGCompletionTime = clock.getTime(); - _updateLoggers(currentDAG, "_post"); + updateLoggers(currentDAG, "_post"); if (this.historyEventHandler.hasRecoveryFailed()) { String recoveryErrorMsg = "Recovery had a fatal error, shutting down session after" + " DAG completion"; @@ -865,10 +885,6 @@ protected synchronized void handle(DAGAppMasterEvent event) { taskCommunicatorManager.dagComplete(cleanupEvent.getDag()); nodes.dagComplete(cleanupEvent.getDag()); containers.dagComplete(cleanupEvent.getDag()); - TezTaskAttemptID.clearCache(); - TezTaskID.clearCache(); - TezVertexID.clearCache(); - TezDAGID.clearCache(); LOG.info("Completed cleanup for DAG: name=" + cleanupEvent.getDag().getName() + ", with id=" + cleanupEvent.getDag().getID()); synchronized (idleStateLock) { @@ -888,9 +904,10 @@ protected synchronized void handle(DAGAppMasterEvent event) { } } - private void _updateLoggers(DAG dag, String appender) { + private void updateLoggers(DAG dag, String appender) { try { - TezUtilsInternal.updateLoggers(dag.getID().toString() + appender); + TezUtilsInternal.updateLoggers(dag.getConf(), dag.getID().toString() + appender, + LoggingUtils.getPatternForAM(dag.getConf())); } catch (FileNotFoundException e) { LOG.warn("Unable to update the logger. Continue with the old logger", e ); } @@ -923,6 +940,15 @@ public void handle(DAGAppMasterEvent event) { protected class DAGAppMasterShutdownHandler { private AtomicBoolean shutdownHandled = new AtomicBoolean(false); private long sleepTimeBeforeExit = TezConstants.TEZ_DAG_SLEEP_TIME_BEFORE_EXIT; + private long shutdownTime; + + public Date getShutdownTime() { + return new Date(shutdownTime); + } + + public void setShutdownTime(long shutdownTime) { + this.shutdownTime = shutdownTime; + } void setSleepTimeBeforeExit(long sleepTimeBeforeExit) { this.sleepTimeBeforeExit = sleepTimeBeforeExit; @@ -941,6 +967,7 @@ public void shutdown(boolean now) { synchronized (shutdownHandlerRunning) { shutdownHandlerRunning.set(true); + setShutdownTime(System.currentTimeMillis()); } LOG.info("Handling DAGAppMaster shutdown"); @@ -1020,117 +1047,49 @@ DAGImpl createDAG(DAGPlan dagPB, TezDAGID dagId) { // TODO Does this move to the client in case of work-preserving recovery. TokenCache.setSessionToken(sessionToken, dagCredentials); - + TezCommonUtils.logCredentials(LOG, dagCredentials, "newDag"); // create single dag DAGImpl newDag = new DAGImpl(dagId, amConf, dagPB, dispatcher.getEventHandler(), taskCommunicatorManager, dagCredentials, clock, appMasterUgi.getShortUserName(), - taskHeartbeatHandler, context); + taskHeartbeatHandler, context).setLogDirs(logDirs); try { if (LOG.isDebugEnabled()) { - LOG.debug("JSON dump for submitted DAG, dagId=" + dagId.toString() - + ", json=" - + DAGUtils.generateSimpleJSONPlan(dagPB).toString()); + LOG.debug("JSON dump for submitted DAG, dagId=" + dagId + ", json=" + + DAGUtils.generateSimpleJSONPlan(dagPB)); } } catch (JSONException e) { LOG.warn("Failed to generate json for DAG", e); } - generateDAGVizFile(dagId, dagPB, logDirs); - writePBTextFile(newDag); + writeDebugArtifacts(dagPB, newDag); return newDag; - } // end createDag() - - String getShortClassName(String className) { - int pos = className.lastIndexOf("."); - if (pos != -1 && pos < className.length()-1) { - return className.substring(pos+1); - } - return className; - } - - - private String sanitizeLabelForViz(String label) { - Matcher m = sanitizeLabelPattern.matcher(label); - return m.replaceAll("_"); } - private void generateDAGVizFile(TezDAGID dagId, DAGPlan dagPB, String[] logDirs) { - Graph graph = new Graph(sanitizeLabelForViz(dagPB.getName())); - - for (VertexPlan v : dagPB.getVertexList()) { - String nodeLabel = sanitizeLabelForViz(v.getName()) - + "[" + getShortClassName(v.getProcessorDescriptor().getClassName() + "]"); - Graph.Node n = graph.newNode(sanitizeLabelForViz(v.getName()), nodeLabel); - for (DAGProtos.RootInputLeafOutputProto input : v.getInputsList()) { - Graph.Node inputNode = graph.getNode(sanitizeLabelForViz(v.getName()) - + "_" + sanitizeLabelForViz(input.getName())); - inputNode.setLabel(sanitizeLabelForViz(v.getName()) - + "[" + sanitizeLabelForViz(input.getName()) + "]"); - inputNode.setShape("box"); - inputNode.addEdge(n, "Input" - + " [inputClass=" + getShortClassName(input.getIODescriptor().getClassName()) - + ", initializer=" + getShortClassName(input.getControllerDescriptor().getClassName()) + "]"); - } - for (DAGProtos.RootInputLeafOutputProto output : v.getOutputsList()) { - Graph.Node outputNode = graph.getNode(sanitizeLabelForViz(v.getName()) - + "_" + sanitizeLabelForViz(output.getName())); - outputNode.setLabel(sanitizeLabelForViz(v.getName()) - + "[" + sanitizeLabelForViz(output.getName()) + "]"); - outputNode.setShape("box"); - n.addEdge(outputNode, "Output" - + " [outputClass=" + getShortClassName(output.getIODescriptor().getClassName()) - + ", committer=" + getShortClassName(output.getControllerDescriptor().getClassName()) + "]"); - } - } - - for (DAGProtos.EdgePlan e : dagPB.getEdgeList()) { - - Graph.Node n = graph.getNode(sanitizeLabelForViz(e.getInputVertexName())); - n.addEdge(graph.getNode(sanitizeLabelForViz(e.getOutputVertexName())), - "[" - + "input=" + getShortClassName(e.getEdgeSource().getClassName()) - + ", output=" + getShortClassName(e.getEdgeDestination().getClassName()) - + ", dataMovement=" + e.getDataMovementType().name().trim() - + ", schedulingType=" + e.getSchedulingType().name().trim() + "]"); - } - - String outputFile = ""; - if (logDirs != null && logDirs.length != 0) { - outputFile += logDirs[0]; - outputFile += File.separator; - } - outputFile += dagId.toString() + ".dot"; - - try { - LOG.info("Generating DAG graphviz file" - + ", dagId=" + dagId.toString() - + ", filePath=" + outputFile); - graph.save(outputFile); - } catch (Exception e) { - LOG.warn("Error occurred when trying to save graph structure" - + " for dag " + dagId.toString(), e); + private void writeDebugArtifacts(DAGPlan dagPB, DAGImpl newDag) { + boolean debugArtifacts = + newDag.getConf().getBoolean(TezConfiguration.TEZ_GENERATE_DEBUG_ARTIFACTS, + TezConfiguration.TEZ_GENERATE_DEBUG_ARTIFACTS_DEFAULT); + if (debugArtifacts) { + Utils.generateDAGVizFile(newDag, dagPB, logDirs, newDag.getDAGScheduler()); + writePBTextFile(newDag); } } private void writePBTextFile(DAG dag) { - if (dag.getConf().getBoolean(TezConfiguration.TEZ_GENERATE_DEBUG_ARTIFACTS, - TezConfiguration.TEZ_GENERATE_DEBUG_ARTIFACTS_DEFAULT)) { + String logFile = logDirs[new Random().nextInt(logDirs.length)] + File.separatorChar + + dag.getID() + "-" + TezConstants.TEZ_PB_PLAN_TEXT_NAME; - String logFile = logDirs[new Random().nextInt(logDirs.length)] + File.separatorChar + - dag.getID().toString() + "-" + TezConstants.TEZ_PB_PLAN_TEXT_NAME; - - LOG.info("Writing DAG plan to: " + logFile); - File outFile = new File(logFile); - try { - PrintWriter printWriter = new PrintWriter(outFile, "UTF-8"); - printWriter.println(TezUtilsInternal.convertDagPlanToString(dag.getJobPlan())); - printWriter.close(); - } catch (IOException e) { - LOG.warn("Failed to write TEZ_PLAN to " + outFile.toString(), e); - } + LOG.info("Writing DAG plan to: " + logFile); + File outFile = new File(logFile); + try { + PrintWriter printWriter = new PrintWriter(outFile, "UTF-8"); + printWriter.println(TezUtilsInternal.convertDagPlanToString(dag.getJobPlan())); + printWriter.close(); + } catch (IOException e) { + LOG.warn("Failed to write TEZ_PLAN to " + outFile, e); } } @@ -1271,7 +1230,7 @@ public List getDiagnostics() { } public float getProgress() { - if (isSession && state.equals(DAGAppMasterState.IDLE)) { + if (isSession && getState().equals(DAGAppMasterState.IDLE)) { return 0.0f; } if(currentDAG != null) { @@ -1363,11 +1322,12 @@ public Void run() throws Exception { public String submitDAGToAppMaster(DAGPlan dagPlan, Map additionalResources) throws TezException { + appMasterReadinessService.waitToBeReady(); + if (sessionStopped.get()) { throw new SessionNotRunning("AM unable to accept new DAG submissions." + " In the process of shutting down"); } - // dag is in cleanup when dag state is completed but AM state is still RUNNING synchronized (idleStateLock) { while (currentDAG != null && currentDAG.isComplete() && state == DAGAppMasterState.RUNNING) { @@ -1398,9 +1358,8 @@ public String submitDAGToAppMaster(DAGPlan dagPlan, // the job user's UGI context LOG.info("Starting DAG submitted via RPC: " + dagPlan.getName()); - if (LOG.isDebugEnabled()) { - LOG.debug("Invoked with additional local resources: " + additionalResources); - } + LOG.debug("Invoked with additional local resources: {}", additionalResources); + if (!dagPlan.getName().startsWith(TezConstants.TEZ_PREWARM_DAG_NAME_PREFIX)) { submittedDAGs.incrementAndGet(); } @@ -1418,7 +1377,7 @@ public void tryKillDAG(DAG dag, String message) throws TezException { } dispatcher.getEventHandler().handle(new DAGEventTerminateDag(dag.getID(), DAGTerminationCause.DAG_KILL, message)); } - + private Map getAdditionalLocalResourceDiff( DAG dag, Map additionalResources) throws TezException { if (additionalResources == null) { @@ -1479,7 +1438,7 @@ public Boolean run() throws Exception { } private static Path findLocalFileForResource(String fileName) { - URL localResource = ClassLoader.getSystemClassLoader().getResource(fileName); + URL localResource = TezClassLoader.getInstance().getResource(fileName); if (localResource == null) return null; return new Path(localResource.getPath()); } @@ -1531,7 +1490,7 @@ private class RunningAppContext implements AppContext { private volatile String queueName; public RunningAppContext(Configuration config) { - checkNotNull(config, "config is null"); + Objects.requireNonNull(config, "config is null"); this.conf = config; this.eventHandler = dispatcher.getEventHandler(); } @@ -1570,7 +1529,7 @@ public long getStartTime() { public DAG getCurrentDAG() { return dag; } - + @Override public ListeningExecutorService getExecService() { return execService; @@ -1735,9 +1694,11 @@ public HadoopShim getHadoopShim() { @Override public Map getApplicationACLs() { - if (getServiceState() != STATE.STARTED) { + STATE serviceState = getServiceState(); + if (serviceState != STATE.STARTED) { throw new TezUncheckedException( - "Cannot get ApplicationACLs before all services have started"); + "Cannot get ApplicationACLs before all services have started, The current service state is " + serviceState + + "." + getShutdownTimeString()); } return taskSchedulerManager.getApplicationAcls(); } @@ -1757,7 +1718,7 @@ public TezDAGID getCurrentDAGID() { @Override public void setDAG(DAG dag) { - Preconditions.checkNotNull(dag, "dag is null"); + Objects.requireNonNull(dag, "dag is null"); try { wLock.lock(); this.dag = dag; @@ -1771,7 +1732,7 @@ public void setDAG(DAG dag) { public long getCumulativeCPUTime() { return getAMCPUTime(); } - + @Override public long getCumulativeGCTime() { return getAMGCTime(); @@ -1798,6 +1759,13 @@ public void setQueueName(String queueName) { } } + private String getShutdownTimeString() { + if (shutdownHandler != null && shutdownHandler.getShutdownTime() != null) { + return " The shutdown hook started at " + shutdownHandler.getShutdownTime(); + } + return ""; + } + private static class ServiceWithDependency implements ServiceStateChangeListener { ServiceWithDependency(Service service) { this.service = service; @@ -1914,13 +1882,12 @@ private void notifyDependentServices() { } } - void startServices(){ + void startServices() { try { Throwable firstError = null; List threads = new ArrayList(); - if(LOG.isDebugEnabled()) { - LOG.debug("Begin parallel start"); - } + LOG.debug("Begin parallel start"); + for(ServiceWithDependency sd : services.values()) { // start the service. If this fails that service // will be stopped and an exception raised @@ -1944,9 +1911,7 @@ void startServices(){ if(firstError != null) { throw ServiceStateException.convert(firstError); } - if(LOG.isDebugEnabled()) { - LOG.debug("End parallel start"); - } + LOG.debug("End parallel start"); } catch (InterruptedException e) { e.printStackTrace(); } @@ -1954,25 +1919,25 @@ void startServices(){ void initServices(Configuration conf) { for (ServiceWithDependency sd : services.values()) { - if (LOG.isDebugEnabled()) { - LOG.debug("Initing service : " + sd.service); - } + LOG.debug("Initing service : {}", sd.service); sd.service.init(conf); } } void stopServices() { + Exception firstException = null; // stop in reverse order of start + if (currentDAG != null) { + currentDAG.onFinish(); + } List serviceList = new ArrayList(services.size()); for (ServiceWithDependency sd : services.values()) { serviceList.add(sd.service); } - Exception firstException = null; + for (int i = services.size() - 1; i >= 0; i--) { Service service = serviceList.get(i); - if (LOG.isDebugEnabled()) { - LOG.debug("Stopping service : " + service); - } + LOG.debug("Stopping service : {}", service); Exception ex = ServiceOperations.stopQuietly(service); if (ex != null && firstException == null) { LOG.warn("Failed to stop service, name=" + service.getName(), ex); @@ -1993,10 +1958,7 @@ private DAGRecoveryData recoverDAG() throws IOException, TezException { LOG.info("Recovering data from previous attempts" + ", currentAttemptId=" + this.appAttemptID.getAttemptId()); this.state = DAGAppMasterState.RECOVERING; - RecoveryParser recoveryParser = new RecoveryParser( - this, recoveryFS, recoveryDataDir, appAttemptID.getAttemptId()); - DAGRecoveryData recoveredDAGData = recoveryParser.parseRecoveryData(); - return recoveredDAGData; + return parseDAGFromRecoveryData(); } } finally { hadoopShim.clearHadoopCallerContext(); @@ -2004,10 +1966,30 @@ private DAGRecoveryData recoverDAG() throws IOException, TezException { } return null; } - - @Override - public synchronized void serviceStart() throws Exception { + private DAGRecoveryData parseDAGFromRecoveryData() throws IOException { + RecoveryParser recoveryParser = new RecoveryParser( + this, recoveryFS, recoveryDataDir, appAttemptID.getAttemptId()); + DAGRecoveryData recoveredDAGData = recoveryParser.parseRecoveryData(); + + /** + * Parsed recovery data can be NULL in scenarios where AM shutdown prematurely during the first attempt + * due to some FATAL error, if that happens recovery stream is not closed and no data is flushed on File System + * In cases like above, in next future attempts of application, recovery returns NULL instead of failing the DAG + * This config when enabled, throws an IOException for such cases, and it assumes that caller will catch these + * IOExceptions and will fail the DAG, which happens currently, JIRA: https://issues.apache.org/jira/browse/TEZ-4474 + */ + if(Objects.isNull(recoveredDAGData) && amConf.getBoolean( + TezConfiguration.TEZ_AM_FAILURE_ON_MISSING_RECOVERY_DATA, + TezConfiguration.TEZ_AM_FAILURE_ON_MISSING_RECOVERY_DATA_DEFAULT)) { + throw new IOException(String.format("Found nothing to recover in currentAttemptId=%s from recovery data dir=%s", + this.appAttemptID.getAttemptId(), this.recoveryDataDir)); + } + return recoveredDAGData; + } + + @Override + public void serviceStart() throws Exception { //start all the components startServices(); super.serviceStart(); @@ -2047,8 +2029,19 @@ public synchronized void serviceStart() throws Exception { return; } + DAGPlan dagPlan = null; if (!isSession) { LOG.info("In Non-Session mode."); + dagPlan = readDAGPlanFile(); + if (hasConcurrentEdge(dagPlan)) { + // Currently a DAG with concurrent edge is deemed unrecoverable + // (run from scratch) on AM failover. Proper AM failover for DAG with + // concurrent edge is pending TEZ-4017 + if (recoveredDAGData != null) { + LOG.warn("Ignoring recoveredDAGData for a recovered DAG with concurrent edge."); + recoveredDAGData = null; + } + } } else { LOG.info("In Session mode. Waiting for DAG over RPC"); this.state = DAGAppMasterState.IDLE; @@ -2079,7 +2072,7 @@ public synchronized void serviceStart() throws Exception { + ", state=" + (recoveredDAGData.dagState == null ? "null" : recoveredDAGData.dagState) + ", failureReason=" + recoveredDAGData.reason); - _updateLoggers(recoveredDAGData.recoveredDAG, ""); + updateLoggers(recoveredDAGData.recoveredDAG, ""); if (recoveredDAGData.nonRecoverable) { addDiagnostic("DAG " + recoveredDAGData.recoveredDagID + " can not be recovered due to " + recoveredDAGData.reason); @@ -2114,7 +2107,7 @@ public synchronized void serviceStart() throws Exception { } } else { LOG.info("Found DAG to recover, dagId=" + recoveredDAGData.recoveredDAG.getID()); - _updateLoggers(recoveredDAGData.recoveredDAG, ""); + updateLoggers(recoveredDAGData.recoveredDAG, ""); DAGRecoveredEvent dagRecoveredEvent = new DAGRecoveredEvent(this.appAttemptID, recoveredDAGData.recoveredDAG.getID(), recoveredDAGData.recoveredDAG.getName(), recoveredDAGData.recoveredDAG.getUserName(), this.clock.getTime(), this.containerLogs); @@ -2123,13 +2116,17 @@ public synchronized void serviceStart() throws Exception { DAGEventRecoverEvent recoverDAGEvent = new DAGEventRecoverEvent( recoveredDAGData.recoveredDAG.getID(), recoveredDAGData); dagEventDispatcher.handle(recoverDAGEvent); + // If we reach here, then we have recoverable DAG and we need to + // reinitialize the vertex services including speculators. + currentDAG.onStart(); this.state = DAGAppMasterState.RUNNING; } } else { if (!isSession) { // No dag recovered - in non-session, just restart the original DAG dagCounter.set(0); - startDAG(); + assert(dagPlan != null); + startDAG(dagPlan, null); } } @@ -2184,59 +2181,60 @@ public void serviceStop() throws Exception { if (isSession) { sessionStopped.set(true); } - synchronized (this) { - if (this.dagSubmissionTimer != null) { - this.dagSubmissionTimer.cancel(); - } - if (this.clientAMHeartBeatTimeoutService != null) { - this.clientAMHeartBeatTimeoutService.shutdownNow(); - } - // release all the held containers before stop services TEZ-2687 - initiateStop(); - stopServices(); - - // Given pre-emption, we should delete tez scratch dir only if unregister is - // successful - boolean deleteTezScratchData = this.amConf.getBoolean( - TezConfiguration.TEZ_AM_STAGING_SCRATCH_DATA_AUTO_DELETE, - TezConfiguration.TEZ_AM_STAGING_SCRATCH_DATA_AUTO_DELETE_DEFAULT); - if (LOG.isDebugEnabled()) { - LOG.debug("Checking whether tez scratch data dir should be deleted, deleteTezScratchData=" - + deleteTezScratchData); - } - if (deleteTezScratchData && this.taskSchedulerManager != null - && this.taskSchedulerManager.hasUnregistered()) { - // Delete tez scratch data dir - if (this.tezSystemStagingDir != null) { - try { - this.appMasterUgi.doAs(new PrivilegedExceptionAction() { - @Override - public Void run() throws Exception { - FileSystem fs = tezSystemStagingDir.getFileSystem(amConf); - boolean deletedStagingDir = fs.delete(tezSystemStagingDir, true); - if (!deletedStagingDir) { - LOG.warn("Failed to delete tez scratch data dir, path=" - + tezSystemStagingDir); - } else { - LOG.info("Completed deletion of tez scratch data dir, path=" - + tezSystemStagingDir); - } - return null; + if (this.dagSubmissionTimer != null) { + this.dagSubmissionTimer.cancel(); + } + if (this.clientAMHeartBeatTimeoutService != null) { + this.clientAMHeartBeatTimeoutService.shutdownNow(); + } + // release all the held containers before stop services TEZ-2687 + initiateStop(); + stopServices(); + + // Given pre-emption, we should delete tez scratch dir only if unregister is + // successful + boolean deleteTezScratchData = this.amConf.getBoolean( + TezConfiguration.TEZ_AM_STAGING_SCRATCH_DATA_AUTO_DELETE, + TezConfiguration.TEZ_AM_STAGING_SCRATCH_DATA_AUTO_DELETE_DEFAULT); + LOG.debug("Checking whether tez scratch data dir should be deleted, deleteTezScratchData={}", + deleteTezScratchData); + if (deleteTezScratchData && this.taskSchedulerManager != null + && this.taskSchedulerManager.hasUnregistered()) { + // Delete tez scratch data dir + if (this.tezSystemStagingDir != null) { + try { + this.appMasterUgi.doAs(new PrivilegedExceptionAction() { + @Override + public Void run() throws Exception { + FileSystem fs = tezSystemStagingDir.getFileSystem(amConf); + boolean deletedStagingDir = fs.delete(tezSystemStagingDir, true); + if (!deletedStagingDir) { + LOG.warn("Failed to delete tez scratch data dir, path=" + + tezSystemStagingDir); + } else { + LOG.info("Completed deletion of tez scratch data dir, path=" + + tezSystemStagingDir); } - }); - } catch (IOException e) { - // Best effort to delete tez scratch data dir - LOG.warn("Failed to delete tez scratch data dir", e); - } + return null; + } + }); + } catch (IOException e) { + // Best effort to delete tez scratch data dir + LOG.warn("Failed to delete tez scratch data dir", e); } } + } - if (execService != null) { - execService.shutdownNow(); - } + if (execService != null) { + execService.shutdownNow(); + } - super.serviceStop(); + // Try to shut down any hooks that are still active + for (TezDAGHook hook : hooks) { + hook.stop(); } + + super.serviceStop(); } private class DagEventDispatcher implements EventHandler { @@ -2244,9 +2242,9 @@ private class DagEventDispatcher implements EventHandler { @Override public void handle(DAGEvent event) { DAG dag = context.getCurrentDAG(); - int eventDagIndex = event.getDAGId().getId(); + int eventDagIndex = event.getDAGID().getId(); if (dag == null || eventDagIndex != dag.getID().getId()) { - return; // event not relevant any more + return; // event not relevant anymore } ((EventHandler)dag).handle(event); } @@ -2257,18 +2255,18 @@ private class TaskEventDispatcher implements EventHandler { @Override public void handle(TaskEvent event) { DAG dag = context.getCurrentDAG(); - int eventDagIndex = - event.getTaskID().getVertexID().getDAGId().getId(); + int eventDagIndex = + event.getDAGID().getId(); if (dag == null || eventDagIndex != dag.getID().getId()) { - return; // event not relevant any more + return; // event not relevant anymore } Task task = - dag.getVertex(event.getTaskID().getVertexID()). + dag.getVertex(event.getVertexID()). getTask(event.getTaskID()); ((EventHandler)task).handle(event); } } - + private class SpeculatorEventHandler implements EventHandler { @Override public void handle(SpeculatorEvent event) { @@ -2287,14 +2285,14 @@ private class TaskAttemptEventDispatcher @Override public void handle(TaskAttemptEvent event) { DAG dag = context.getCurrentDAG(); - int eventDagIndex = - event.getTaskAttemptID().getTaskID().getVertexID().getDAGId().getId(); + int eventDagIndex = + event.getDAGID().getId(); if (dag == null || eventDagIndex != dag.getID().getId()) { - return; // event not relevant any more + return; // event not relevant anymore } Task task = - dag.getVertex(event.getTaskAttemptID().getTaskID().getVertexID()). - getTask(event.getTaskAttemptID().getTaskID()); + dag.getVertex(event.getVertexID()). + getTask(event.getTaskID()); TaskAttempt attempt = task.getAttempt(event.getTaskAttemptID()); ((EventHandler) attempt).handle(event); } @@ -2306,27 +2304,18 @@ private class VertexEventDispatcher @Override public void handle(VertexEvent event) { DAG dag = context.getCurrentDAG(); - int eventDagIndex = - event.getVertexId().getDAGId().getId(); + int eventDagIndex = + event.getDAGID().getId(); if (dag == null || eventDagIndex != dag.getID().getId()) { - return; // event not relevant any more + return; // event not relevant anymore } - + Vertex vertex = - dag.getVertex(event.getVertexId()); + dag.getVertex(event.getVertexID()); ((EventHandler) vertex).handle(event); } } - private static void validateInputParam(String value, String param) - throws IOException { - if (value == null) { - String msg = param + " is null"; - LOG.error(msg); - throw new IOException(msg); - } - } - private long checkAndHandleDAGClientTimeout() throws TezException { if (EnumSet.of(DAGAppMasterState.NEW, DAGAppMasterState.RECOVERING).contains(this.state) || sessionStopped.get()) { @@ -2378,6 +2367,8 @@ public boolean isSession() { public static void main(String[] args) { try { + // Install the tez class loader, which can be used add new resources + TezClassLoader.setupTezClassLoader(); Thread.setDefaultUncaughtExceptionHandler(new YarnUncaughtExceptionHandler()); final String pid = System.getenv().get("JVM_PID"); String containerIdStr = @@ -2393,8 +2384,8 @@ public static void main(String[] args) { clientVersion = VersionInfo.UNKNOWN; } - validateInputParam(appSubmitTimeStr, - ApplicationConstants.APP_SUBMIT_TIME_ENV); + Objects.requireNonNull(appSubmitTimeStr, + ApplicationConstants.APP_SUBMIT_TIME_ENV + " is null"); ContainerId containerId = ConverterUtils.toContainerId(containerIdStr); ApplicationAttemptId applicationAttemptId = @@ -2424,8 +2415,7 @@ public static void main(String[] args) { + ", localDirs=" + System.getenv(Environment.LOCAL_DIRS.name()) + ", logDirs=" + System.getenv(Environment.LOG_DIRS.name())); - // TODO Does this really need to be a YarnConfiguration ? - Configuration conf = new Configuration(new YarnConfiguration()); + Configuration conf = new Configuration(); ConfigurationProto confProto = TezUtilsInternal.readUserSpecifiedTezConfiguration(System.getenv(Environment.PWD.name())); @@ -2479,9 +2469,7 @@ static class DAGAppMasterShutdownHook implements Runnable { public void run() { LOG.info("DAGAppMasterShutdownHook invoked"); if(appMaster.getServiceState() == STATE.STOPPED) { - if(LOG.isDebugEnabled()) { - LOG.debug("DAGAppMaster already stopped. Ignoring signal"); - } + LOG.debug("DAGAppMaster already stopped. Ignoring signal"); synchronized (appMaster.shutdownHandlerRunning) { try { if (appMaster.shutdownHandlerRunning.get()) { @@ -2516,23 +2504,30 @@ public void run() { } } - private void startDAG() throws IOException, TezException { + private boolean hasConcurrentEdge(DAGPlan dagPlan) { + boolean hasConcurrentEdge = false; + for (DAGProtos.EdgePlan edge : dagPlan.getEdgeList()) { + if (DAGProtos.PlanEdgeSchedulingType.CONCURRENT.equals(edge.getSchedulingType())) { + return true; + } + } + return hasConcurrentEdge; + } + + private DAGPlan readDAGPlanFile() throws IOException, TezException { FileInputStream dagPBBinaryStream = null; + DAGPlan dagPlan = null; try { - DAGPlan dagPlan = null; - // Read the protobuf DAG dagPBBinaryStream = new FileInputStream(new File(workingDirectory, TezConstants.TEZ_PB_PLAN_BINARY_NAME)); dagPlan = DAGPlan.parseFrom(dagPBBinaryStream); - - startDAG(dagPlan, null); - } finally { if (dagPBBinaryStream != null) { dagPBBinaryStream.close(); } } + return dagPlan; } private void startDAG(DAGPlan dagPlan, Map additionalAMResources) @@ -2542,7 +2537,9 @@ private void startDAG(DAGPlan dagPlan, Map additionalAMRe // /////////////////// Create the job itself. final DAG newDAG = createDAG(dagPlan); - _updateLoggers(newDAG, ""); + LoggingUtils.initLoggingContext(mdcContext, newDAG.getConf(), newDAG.getID().toString(), null); + + updateLoggers(newDAG, ""); if (LOG.isDebugEnabled()) { LOG.debug("Running a DAG with " + dagPlan.getVertexCount() + " vertices "); @@ -2593,14 +2590,28 @@ public Void run() throws Exception { throw new TezUncheckedException(e); } + countHeldContainers(newDAG); startDAGExecution(newDAG, lrDiff); // set state after curDag is set this.state = DAGAppMasterState.RUNNING; } + private void countHeldContainers(DAG newDAG) { + newDAG.setDagCounter(DAGCounter.INITIAL_HELD_CONTAINERS, + taskSchedulerManager.getHeldContainersCount()); + } + private void startDAGExecution(DAG dag, final Map additionalAmResources) throws TezException { currentDAG = dag; + final Configuration conf = dag.getConf(); + final String[] hookClasses = conf.getStrings(TezConfiguration.TEZ_AM_HOOKS, new String[0]); + hooks = new TezDAGHook[hookClasses.length]; + for (int i = 0; i < hooks.length; i++) { + hooks[i] = ReflectionUtils.createClazzInstance(hookClasses[i]); + hooks[i].start(dag.getID(), conf); + } + // Try localizing the actual resources. List additionalUrlsForClasspath; try { @@ -2630,6 +2641,8 @@ public List run() throws Exception { // job-init to be done completely here. dagEventDispatcher.handle(initDagEvent); + dag.onStart(); + // All components have started, start the job. /** create a job-start event to get this ball rolling */ DAGEvent startDagEvent = new DAGEventStartDag(currentDAG.getID(), additionalUrlsForClasspath); @@ -2680,8 +2693,12 @@ private boolean enableWebUIService() { TezConfiguration.TEZ_AM_WEBSERVICE_ENABLE_DEFAULT); } + public String getWebUIAddress() { + return webUIService == null ? null : webUIService.getBaseUrl(); + } + @VisibleForTesting - static void parseAllPlugins( + public static void parseAllPlugins( List taskSchedulerDescriptors, BiMap taskSchedulerPluginMap, List containerLauncherDescriptors, BiMap containerLauncherPluginMap, List taskCommDescriptors, BiMap taskCommPluginMap, @@ -2801,4 +2818,11 @@ String buildPluginComponentLog(List namedEntityDescriptor return sb.toString(); } + public void vertexComplete(TezVertexID completedVertexID, Set nodesList) { + getContainerLauncherManager().vertexComplete(completedVertexID, jobTokenSecretManager, nodesList); + } + + public void taskAttemptFailed(TezTaskAttemptID attemptID, NodeId nodeId) { + getContainerLauncherManager().taskAttemptFailed(attemptID, jobTokenSecretManager, nodeId); + } } diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/DAGAppMasterReadinessService.java b/tez-dag/src/main/java/org/apache/tez/dag/app/DAGAppMasterReadinessService.java new file mode 100644 index 0000000000..cd7dff0f6b --- /dev/null +++ b/tez-dag/src/main/java/org/apache/tez/dag/app/DAGAppMasterReadinessService.java @@ -0,0 +1,83 @@ +/** +* Licensed to the Apache Software Foundation (ASF) under one +* or more contributor license agreements. See the NOTICE file +* distributed with this work for additional information +* regarding copyright ownership. The ASF licenses this file +* to you under the Apache License, Version 2.0 (the +* "License"); you may not use this file except in compliance +* with the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ +package org.apache.tez.dag.app; + +import java.util.concurrent.atomic.AtomicBoolean; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.service.AbstractService; +import org.apache.tez.dag.api.TezConfiguration; +import org.apache.tez.dag.api.TezException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * This is an artifical service to be used in DAGAppMaster, + * which can be added to have dependencies that are crucial in order to be + * able to run DAGs. + * + */ +public class DAGAppMasterReadinessService extends AbstractService { + private static final Logger LOG = LoggerFactory.getLogger(DAGAppMasterReadinessService.class); + + private AtomicBoolean ready = new AtomicBoolean(false); + private int timeoutMs; + + public DAGAppMasterReadinessService(String name) { + super(name); + } + + @Override + protected void serviceInit(Configuration conf) throws Exception { + super.serviceInit(conf); + timeoutMs = getConfig().getInt(TezConfiguration.TEZ_AM_READY_FOR_SUBMIT_TIMEOUT_MS, + TezConfiguration.TEZ_AM_READY_FOR_SUBMIT_TIMEOUT_MS_DEFAULT); + if (timeoutMs <= 0) { + throw new TezException( + "timeout <= 0 is not supported for " + TezConfiguration.TEZ_AM_READY_FOR_SUBMIT_TIMEOUT_MS); + } + } + + @Override + protected void serviceStart() throws Exception { + super.serviceStart(); + ready.set(true); + } + + /** + * The waitToBeReady waits until this service really starts. When the serviceStart + * is called and this service is ready, we can make sure that the dependency services + * has already been started too. + * @throws TezException + */ + public void waitToBeReady() throws TezException { + long start = System.currentTimeMillis(); + while (!ready.get()) { + if (System.currentTimeMillis() - start > timeoutMs) { + throw new TezException("App Master is not ready within the configured time period (" + timeoutMs + "ms). " + + "Please check logs for AM service states."); + } + try { + LOG.info("App is not ready yet, waiting 100ms"); + Thread.sleep(100); + } catch (InterruptedException e) { + throw new TezException(e); + } + } + } +} diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/HeartbeatHandlerBase.java b/tez-dag/src/main/java/org/apache/tez/dag/app/HeartbeatHandlerBase.java index 5c786b2232..b5ded81500 100644 --- a/tez-dag/src/main/java/org/apache/tez/dag/app/HeartbeatHandlerBase.java +++ b/tez-dag/src/main/java/org/apache/tez/dag/app/HeartbeatHandlerBase.java @@ -42,7 +42,6 @@ public abstract class HeartbeatHandlerBase extends AbstractService { protected final AppContext appContext; private ConcurrentMap runningMap; - private volatile boolean stopped; public HeartbeatHandlerBase(AppContext appContext, int expectedConcurrency, String name) { super(name); @@ -70,7 +69,6 @@ public void serviceStart() { @Override public void serviceStop() { - stopped = true; if (timeOutCheckerThread != null) { timeOutCheckerThread.interrupt(); } @@ -140,7 +138,7 @@ private class PingChecker implements Runnable { @Override public void run() { - while (!stopped && !Thread.currentThread().isInterrupted()) { + while (!Thread.currentThread().isInterrupted()) { Iterator> iterator = runningMap.entrySet().iterator(); @@ -158,7 +156,7 @@ public void run() { try { Thread.sleep(timeOutCheckInterval); } catch (InterruptedException e) { - break; + Thread.currentThread().interrupt(); } } } diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/LocalDAGAppMaster.java b/tez-dag/src/main/java/org/apache/tez/dag/app/LocalDAGAppMaster.java new file mode 100644 index 0000000000..e0c8443577 --- /dev/null +++ b/tez-dag/src/main/java/org/apache/tez/dag/app/LocalDAGAppMaster.java @@ -0,0 +1,51 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tez.dag.app; + +import org.apache.hadoop.security.Credentials; +import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; +import org.apache.hadoop.yarn.api.records.ContainerId; +import org.apache.hadoop.yarn.util.Clock; +import org.apache.tez.dag.api.records.DAGProtos.AMPluginDescriptorProto; + +/** + * A DAGAppMaster implementation which is really local in a sense that it doesn't start an RPC + * server for handling dag requests. It is typically used by LocalClient, which already has an + * embedded DAGAppMaster, but by default, it calls RPC methods. With + * tez.local.mode.without.network=true, LocalClient will call the DAGAppMaster's methods directly. + */ +public class LocalDAGAppMaster extends DAGAppMaster { + + public LocalDAGAppMaster(ApplicationAttemptId applicationAttemptId, ContainerId containerId, + String nmHost, int nmPort, int nmHttpPort, Clock clock, long appSubmitTime, boolean isSession, + String workingDirectory, String[] localDirs, String[] logDirs, String clientVersion, + Credentials credentials, String jobUserName, AMPluginDescriptorProto pluginDescriptorProto) { + super(applicationAttemptId, containerId, nmHost, nmPort, nmHttpPort, clock, appSubmitTime, + isSession, workingDirectory, localDirs, logDirs, clientVersion, credentials, jobUserName, + pluginDescriptorProto); + } + + @Override + protected void initClientRpcServer() { + // nothing to do, in case of LocalDAGAppMaster clientRpcServer is not supposed to be used by clients + } + + public int getRpcPort() { + return 0; + } +} diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/RecoveryParser.java b/tez-dag/src/main/java/org/apache/tez/dag/app/RecoveryParser.java index 368dd17f47..0f40700cf3 100644 --- a/tez-dag/src/main/java/org/apache/tez/dag/app/RecoveryParser.java +++ b/tez-dag/src/main/java/org/apache/tez/dag/app/RecoveryParser.java @@ -19,6 +19,7 @@ package org.apache.tez.dag.app; import java.io.EOFException; +import java.io.FileNotFoundException; import java.io.IOException; import java.net.URL; import java.util.ArrayList; @@ -27,6 +28,7 @@ import java.util.Map; import java.util.Map.Entry; +import com.google.protobuf.CodedInputStream; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; @@ -34,6 +36,7 @@ import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.util.functional.FutureIO; import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.LocalResource; import org.apache.tez.common.TezCommonUtils; @@ -73,15 +76,14 @@ import org.apache.tez.dag.records.TezVertexID; import org.apache.tez.dag.recovery.records.RecoveryProtos; import org.apache.tez.dag.recovery.records.RecoveryProtos.SummaryEventProto; -import org.apache.tez.runtime.api.impl.TezEvent; import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; +import org.apache.tez.common.Preconditions; /** * RecoverParser is mainly for Tez AM Recovery. It would read the recovery events. (summary & non-summary) - * + * */ public class RecoveryParser { @@ -150,7 +152,7 @@ public DAGRecoveryData(DAGSummaryData dagSummaryData) { // DAG is not recoverable if vertex has committer and has completed the commit (based on summary recovery events) // but its full recovery events are not seen. (based on non-summary recovery events) - // Unrecoverable reason: vertex is committed we cannot rerun it and if vertex recovery events are not completed + // Unrecoverable reason: vertex is committed we cannot rerun it and if vertex recovery events are not completed // we cannot run other vertices that may depend on this one. So we have to abort. public void checkRecoverableNonSummary() { // It is OK without full recovering events if the dag is completed based on summary event. @@ -251,11 +253,15 @@ private static void parseSummaryFile(FSDataInputStream inputStream) } } - private static HistoryEvent getNextEvent(FSDataInputStream inputStream) + private static HistoryEvent getNextEvent(CodedInputStream inputStream) throws IOException { + boolean isAtEnd = inputStream.isAtEnd(); + if (isAtEnd) { + return null; + } int eventTypeOrdinal = -1; try { - eventTypeOrdinal = inputStream.readInt(); + eventTypeOrdinal = inputStream.readFixed32(); } catch (EOFException eof) { return null; } @@ -342,24 +348,23 @@ private static HistoryEvent getNextEvent(FSDataInputStream inputStream) } catch (EOFException eof) { return null; } - if (LOG.isDebugEnabled()) { - LOG.debug("Parsed event from input stream" - + ", eventType=" + eventType - + ", event=" + event.toString()); - } + LOG.debug("Parsed event from input stream, eventType={}, event={}", + eventType, event); return event; } public static List parseDAGRecoveryFile(FSDataInputStream inputStream) throws IOException { List historyEvents = new ArrayList(); + CodedInputStream codedInputStream = CodedInputStream.newInstance(inputStream); + codedInputStream.setSizeLimit(Integer.MAX_VALUE); while (true) { - HistoryEvent historyEvent = getNextEvent(inputStream); + HistoryEvent historyEvent = getNextEvent(codedInputStream); if (historyEvent == null) { LOG.info("Reached end of stream"); break; } - LOG.debug("Read HistoryEvent, eventType=" + historyEvent.getEventType() + ", event=" + historyEvent); + LOG.debug("Read HistoryEvent, eventType={}, event={}", historyEvent.getEventType(), historyEvent); historyEvents.add(historyEvent); } return historyEvents; @@ -380,18 +385,18 @@ public static List readRecoveryEvents(TezConfiguration tezConf, Ap new Path(currentAttemptRecoveryDataDir, appId.toString().replace( "application", "dag") + "_1" + TezConstants.DAG_RECOVERY_RECOVER_FILE_SUFFIX); - if (fs.exists(recoveryFilePath)) { - LOG.info("Read recovery file:" + recoveryFilePath); - FSDataInputStream in = null; - try { - in = fs.open(recoveryFilePath); - historyEvents.addAll(RecoveryParser.parseDAGRecoveryFile(in)); - } catch (IOException e) { - throw e; - } finally { - if (in != null) { - in.close(); - } + LOG.info("Read recovery file:" + recoveryFilePath); + FSDataInputStream in = null; + try { + in = fs.open(recoveryFilePath); + historyEvents.addAll(RecoveryParser.parseDAGRecoveryFile(in)); + } catch (FileNotFoundException fnf) { + // Ignore, the file doesn't exist + } catch (IOException e) { + throw e; + } finally { + if (in != null) { + in.close(); } } } @@ -422,16 +427,16 @@ public static void main(String argv[]) throws IOException { } } - private Path getSummaryPath(Path attemptRrecoveryDataDir) { - return TezCommonUtils.getSummaryRecoveryPath(attemptRrecoveryDataDir); + private Path getSummaryPath(Path attemptRecoveryDataDir) { + return TezCommonUtils.getSummaryRecoveryPath(attemptRecoveryDataDir); } - private FSDataInputStream getSummaryStream(Path summaryPath) - throws IOException { - if (!recoveryFS.exists(summaryPath)) { + private FSDataInputStream getSummaryStream(Path summaryPath, FileStatus summaryFileStatus) throws IOException { + try { + return FutureIO.awaitFuture(recoveryFS.openFile(summaryPath).withFileStatus(summaryFileStatus).build()); + } catch (FileNotFoundException fnf) { return null; } - return recoveryFS.open(summaryPath, recoveryBufferSize); } private Path getDAGRecoveryFilePath(Path recoveryDataDir, @@ -641,7 +646,7 @@ private List getDAGRecoveryFiles(TezDAGID dagId) throws IOException { /** * 1. Read Summary Recovery file and build DAGSummaryData - * Check whether it is recoverable based on the summary file (whether dag is + * Check whether it is recoverable based on the summary file (whether dag is * in the middle of committing) * 2. Read the non-Summary Recovery file and build DAGRecoveryData * Check whether it is recoverable based on both the summary file and non-summary file @@ -662,7 +667,7 @@ public DAGRecoveryData parseRecoveryData() throws IOException { + ", len=" + summaryFileStatus.getLen() + ", lastModTime=" + summaryFileStatus.getModificationTime()); FSDataInputStream summaryStream = getSummaryStream( - summaryFile); + summaryFile, summaryFileStatus); while (true) { RecoveryProtos.SummaryEventProto proto; try { @@ -738,17 +743,21 @@ public DAGRecoveryData parseRecoveryData() throws IOException { + lastRecoveryFile); break; } - FileStatus fileStatus = recoveryFS.getFileStatus(dagRecoveryFile); lastRecoveryFile = dagRecoveryFile; - LOG.info("Trying to recover dag from recovery file" - + ", dagId=" + lastInProgressDAG.toString() - + ", dagRecoveryFile=" + dagRecoveryFile - + ", len=" + fileStatus.getLen()); + LOG.info("Trying to recover dag from recovery file, dagId={}, dagRecoveryFile={}", lastInProgressDAG, + dagRecoveryFile); + if (LOG.isDebugEnabled()) { + FileStatus fileStatus = recoveryFS.getFileStatus(dagRecoveryFile); + LOG.debug("Recovery file details: {}", fileStatus); + } + FSDataInputStream dagRecoveryStream = recoveryFS.open(dagRecoveryFile, recoveryBufferSize); + CodedInputStream codedInputStream = CodedInputStream.newInstance(dagRecoveryStream); + codedInputStream.setSizeLimit(Integer.MAX_VALUE); while (true) { HistoryEvent event; try { - event = getNextEvent(dagRecoveryStream); + event = getNextEvent(codedInputStream); if (event == null) { LOG.info("Reached end of dag recovery stream"); break; @@ -791,10 +800,10 @@ public DAGRecoveryData parseRecoveryData() throws IOException { case DAG_FINISHED: recoveredDAGData.dagFinishedEvent = (DAGFinishedEvent)event; skipAllOtherEvents = true; - break; + break; case DAG_COMMIT_STARTED: case VERTEX_GROUP_COMMIT_STARTED: - case VERTEX_GROUP_COMMIT_FINISHED: + case VERTEX_GROUP_COMMIT_FINISHED: case CONTAINER_LAUNCHED: { // Nothing to do for now @@ -841,9 +850,9 @@ public DAGRecoveryData parseRecoveryData() throws IOException { case TASK_STARTED: { TaskStartedEvent taskStartedEvent = (TaskStartedEvent) event; - VertexRecoveryData vertexRecoveryData = recoveredDAGData.vertexRecoveryDataMap.get(taskStartedEvent.getTaskID().getVertexID()); + VertexRecoveryData vertexRecoveryData = recoveredDAGData.vertexRecoveryDataMap.get(taskStartedEvent.getVertexID()); Preconditions.checkArgument(vertexRecoveryData != null, - "Invalid TaskStartedEvent, its vertex does not exist:" + taskStartedEvent.getTaskID().getVertexID()); + "Invalid TaskStartedEvent, its vertex does not exist:" + taskStartedEvent.getVertexID()); TaskRecoveryData taskRecoveryData = vertexRecoveryData.maybeCreateTaskRecoveryData(taskStartedEvent.getTaskID()); taskRecoveryData.taskStartedEvent = taskStartedEvent; break; @@ -851,9 +860,9 @@ public DAGRecoveryData parseRecoveryData() throws IOException { case TASK_FINISHED: { TaskFinishedEvent taskFinishedEvent = (TaskFinishedEvent) event; - VertexRecoveryData vertexRecoveryData = recoveredDAGData.vertexRecoveryDataMap.get(taskFinishedEvent.getTaskID().getVertexID()); + VertexRecoveryData vertexRecoveryData = recoveredDAGData.vertexRecoveryDataMap.get(taskFinishedEvent.getVertexID()); Preconditions.checkArgument(vertexRecoveryData != null, - "Invalid TaskFinishedEvent, its vertex does not exist:" + taskFinishedEvent.getTaskID().getVertexID()); + "Invalid TaskFinishedEvent, its vertex does not exist:" + taskFinishedEvent.getVertexID()); TaskRecoveryData taskRecoveryData = vertexRecoveryData.maybeCreateTaskRecoveryData(taskFinishedEvent.getTaskID()); taskRecoveryData.taskFinishedEvent = taskFinishedEvent; break; @@ -862,7 +871,7 @@ public DAGRecoveryData parseRecoveryData() throws IOException { { TaskAttemptStartedEvent taStartedEvent = (TaskAttemptStartedEvent)event; VertexRecoveryData vertexRecoveryData = recoveredDAGData.vertexRecoveryDataMap.get( - taStartedEvent.getTaskAttemptID().getTaskID().getVertexID()); + taStartedEvent.getVertexID()); Preconditions.checkArgument(vertexRecoveryData != null, "Invalid TaskAttemptStartedEvent, its vertexId does not exist, taId=" + taStartedEvent.getTaskAttemptID()); TaskRecoveryData taskRecoveryData = vertexRecoveryData.taskRecoveryDataMap @@ -877,7 +886,7 @@ public DAGRecoveryData parseRecoveryData() throws IOException { { TaskAttemptFinishedEvent taFinishedEvent = (TaskAttemptFinishedEvent)event; VertexRecoveryData vertexRecoveryData = recoveredDAGData.vertexRecoveryDataMap.get( - taFinishedEvent.getTaskAttemptID().getTaskID().getVertexID()); + taFinishedEvent.getVertexID()); Preconditions.checkArgument(vertexRecoveryData != null, "Invalid TaskAttemtFinishedEvent, its vertexId does not exist, taId=" + taFinishedEvent.getTaskAttemptID()); TaskRecoveryData taskRecoveryData = vertexRecoveryData.taskRecoveryDataMap @@ -913,27 +922,27 @@ public static class VertexRecoveryData { private VertexFinishedEvent vertexFinishedEvent; private Map taskRecoveryDataMap = new HashMap(); - private boolean commited; + private boolean committed; @VisibleForTesting public VertexRecoveryData(VertexInitializedEvent vertexInitedEvent, VertexConfigurationDoneEvent vertexReconfigureDoneEvent, VertexStartedEvent vertexStartedEvent, VertexFinishedEvent vertexFinishedEvent, - Map taskRecoveryDataMap, boolean commited) { + Map taskRecoveryDataMap, boolean committed) { super(); this.vertexInitedEvent = vertexInitedEvent; this.vertexConfigurationDoneEvent = vertexReconfigureDoneEvent; this.vertexStartedEvent = vertexStartedEvent; this.vertexFinishedEvent = vertexFinishedEvent; this.taskRecoveryDataMap = taskRecoveryDataMap; - this.commited = commited; + this.committed = committed; } public VertexRecoveryData(boolean committed) { - this.commited = committed; + this.committed = committed; } - + public VertexInitializedEvent getVertexInitedEvent() { return vertexInitedEvent; } @@ -962,6 +971,10 @@ public boolean shouldSkipInit() { return vertexInitedEvent != null && vertexConfigurationDoneEvent != null; } + public boolean isVertexTasksStarted() { + return taskRecoveryDataMap != null && !taskRecoveryDataMap.isEmpty(); + } + public boolean isVertexStarted() { return vertexStartedEvent != null; } @@ -978,7 +991,7 @@ public boolean isVertexFinished() { } public boolean isVertexCommitted() { - return this.commited; + return this.committed; } public TaskRecoveryData getTaskRecoveryData(TezTaskID taskId) { diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/TaskCommunicatorContextImpl.java b/tez-dag/src/main/java/org/apache/tez/dag/app/TaskCommunicatorContextImpl.java index 1adbf6edf8..1e2671f5b4 100644 --- a/tez-dag/src/main/java/org/apache/tez/dag/app/TaskCommunicatorContextImpl.java +++ b/tez-dag/src/main/java/org/apache/tez/dag/app/TaskCommunicatorContextImpl.java @@ -19,9 +19,11 @@ import java.io.IOException; import java.util.Set; import java.util.concurrent.locks.ReentrantReadWriteLock; +import java.util.Objects; +import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Function; -import com.google.common.base.Preconditions; +import org.apache.tez.common.Preconditions; import com.google.common.collect.Iterables; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.security.Credentials; @@ -48,7 +50,7 @@ @InterfaceAudience.Private public class TaskCommunicatorContextImpl implements TaskCommunicatorContext, VertexStateUpdateListener { - // TODO TEZ-2003 (post) TEZ-2669 Propagate errors baack to the AM with proper error reporting + // TODO TEZ-2003 (post) TEZ-2669 Propagate errors back to the AM with proper error reporting private final AppContext context; private final TaskCommunicatorManager taskCommunicatorManager; @@ -57,7 +59,8 @@ public class TaskCommunicatorContextImpl implements TaskCommunicatorContext, Ver private final ReentrantReadWriteLock.WriteLock dagChangedWriteLock; private final UserPayload userPayload; - private DAG dag; + @VisibleForTesting + DAG dag; public TaskCommunicatorContextImpl(AppContext appContext, TaskCommunicatorManager taskCommunicatorManager, @@ -155,7 +158,7 @@ public void taskFailed(TezTaskAttemptID taskAttemptId, TaskFailureType taskFailu @Override public void registerForVertexStateUpdates(String vertexName, @Nullable Set stateSet) { - Preconditions.checkNotNull(vertexName, "VertexName cannot be null: " + vertexName); + Objects.requireNonNull(vertexName, "VertexName cannot be null: " + vertexName); DAG dag = getDag(); dag.getStateChangeNotifier().registerForVertexUpdates(vertexName, stateSet, this); @@ -174,7 +177,7 @@ public DagInfo getCurrentDagInfo() { @Override public Iterable getInputVertexNames(String vertexName) { - Preconditions.checkNotNull(vertexName, "VertexName cannot be null: " + vertexName); + Objects.requireNonNull(vertexName, "VertexName cannot be null: " + vertexName); DAG dag = getDag(); Vertex vertex = dag.getVertex(vertexName); Set sources = vertex.getInputVertices().keySet(); @@ -188,7 +191,7 @@ public String apply(Vertex input) { @Override public int getVertexTotalTaskCount(String vertexName) { - Preconditions.checkArgument(vertexName != null, "VertexName must be specified"); + Objects.requireNonNull(vertexName, "VertexName must be specified"); DAG dag = getDag(); Vertex vertex = dag.getVertex(vertexName); return vertex.getTotalTasks(); @@ -196,7 +199,7 @@ public int getVertexTotalTaskCount(String vertexName) { @Override public int getVertexCompletedTaskCount(String vertexName) { - Preconditions.checkArgument(vertexName != null, "VertexName must be specified"); + Objects.requireNonNull(vertexName, "VertexName must be specified"); DAG dag = getDag(); Vertex vertex = dag.getVertex(vertexName); return vertex.getCompletedTasks(); @@ -204,7 +207,7 @@ public int getVertexCompletedTaskCount(String vertexName) { @Override public int getVertexRunningTaskCount(String vertexName) { - Preconditions.checkArgument(vertexName != null, "VertexName must be specified"); + Objects.requireNonNull(vertexName, "VertexName must be specified"); DAG dag = getDag(); Vertex vertex = dag.getVertex(vertexName); return vertex.getRunningTasks(); @@ -212,7 +215,7 @@ public int getVertexRunningTaskCount(String vertexName) { @Override public long getFirstAttemptStartTime(String vertexName, int taskIndex) { - Preconditions.checkArgument(vertexName != null, "VertexName must be specified"); + Objects.requireNonNull(vertexName, "VertexName must be specified"); Preconditions.checkArgument(taskIndex >=0, "TaskIndex must be > 0"); DAG dag = getDag(); Vertex vertex = dag.getVertex(vertexName); @@ -227,7 +230,7 @@ public long getDagStartTime() { @Override public void reportError(@Nonnull ServicePluginError servicePluginError, String message, DagInfo dagInfo) { - Preconditions.checkNotNull(servicePluginError, "ServicePluginError must be set"); + Objects.requireNonNull(servicePluginError, "ServicePluginError must be set"); taskCommunicatorManager.reportError(taskCommunicatorIndex, servicePluginError, message, dagInfo); } diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/TaskCommunicatorManager.java b/tez-dag/src/main/java/org/apache/tez/dag/app/TaskCommunicatorManager.java index d1b0349eec..6846d342ec 100644 --- a/tez-dag/src/main/java/org/apache/tez/dag/app/TaskCommunicatorManager.java +++ b/tez-dag/src/main/java/org/apache/tez/dag/app/TaskCommunicatorManager.java @@ -27,7 +27,7 @@ import java.util.concurrent.ConcurrentMap; import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; +import org.apache.tez.common.Preconditions; import org.apache.commons.collections4.ListUtils; import org.apache.hadoop.yarn.event.Event; @@ -202,8 +202,7 @@ TaskCommunicator createUberTaskCommunicator(TaskCommunicatorContext taskCommunic TaskCommunicator createCustomTaskCommunicator(TaskCommunicatorContext taskCommunicatorContext, NamedEntityDescriptor taskCommDescriptor) throws TezException { - LOG.info("Creating TaskCommunicator {}:{} " + taskCommDescriptor.getEntityName(), - taskCommDescriptor.getClassName()); + LOG.info("Creating TaskCommunicator {}:{} ", taskCommDescriptor.getEntityName(), taskCommDescriptor.getClassName()); Class taskCommClazz = (Class) ReflectionUtils .getClazz(taskCommDescriptor.getClassName()); @@ -220,10 +219,7 @@ public TaskHeartbeatResponse heartbeat(TaskHeartbeatRequest request) throws IOException, TezException { ContainerId containerId = ConverterUtils.toContainerId(request .getContainerIdentifier()); - if (LOG.isDebugEnabled()) { - LOG.debug("Received heartbeat from container" - + ", request=" + request); - } + LOG.debug("Received heartbeat from container, request={}", request); if (!registeredContainers.containsKey(containerId)) { LOG.warn("Received task heartbeat from unknown container with id: " + containerId + @@ -353,14 +349,14 @@ public TaskHeartbeatResponse heartbeat(TaskHeartbeatRequest request) } } if (!eventsForVertex.isEmpty()) { - TezVertexID vertexId = taskAttemptID.getTaskID().getVertexID(); + TezVertexID vertexId = taskAttemptID.getVertexID(); sendEvent( new VertexEventRouteEvent(vertexId, Collections.unmodifiableList(eventsForVertex))); } taskHeartbeatHandler.pinged(taskAttemptID); eventInfo = context .getCurrentDAG() - .getVertex(taskAttemptID.getTaskID().getVertexID()) + .getVertex(taskAttemptID.getVertexID()) .getTaskAttemptTezEvents(taskAttemptID, request.getStartIndex(), request.getPreRoutedStartIndex(), request.getMaxEvents()); } @@ -390,7 +386,7 @@ public void taskKilled(TezTaskAttemptID taskAttemptId, TaskAttemptEndReason task // and messages from the scheduler will release the container. // TODO TEZ-2003 (post) TEZ-2671 Maybe consider un-registering here itself, since the task is not active anymore, // instead of waiting for the unregister to flow through the Container. - // Fix along the same lines as TEZ-2124 by introducing an explict context. + // Fix along the same lines as TEZ-2124 by introducing an explicit context. sendEvent(new TaskAttemptEventAttemptKilled(taskAttemptId, diagnostics, TezUtilsInternal.fromTaskAttemptEndReason( taskAttemptEndReason))); @@ -403,7 +399,7 @@ public void taskFailed(TezTaskAttemptID taskAttemptId, TaskFailureType taskFailu // and messages from the scheduler will release the container. // TODO TEZ-2003 (post) TEZ-2671 Maybe consider un-registering here itself, since the task is not active anymore, // instead of waiting for the unregister to flow through the Container. - // Fix along the same lines as TEZ-2124 by introducing an explict context. + // Fix along the same lines as TEZ-2124 by introducing an explicit context. //TODO-3183. Allow the FailureType to be specified sendEvent(new TaskAttemptEventAttemptFailed(taskAttemptId, TaskAttemptEventType.TA_FAILED, taskFailureType, diagnostics, TezUtilsInternal.fromTaskAttemptEndReason( @@ -445,7 +441,7 @@ public boolean canCommit(TezTaskAttemptID taskAttemptId) throws IOException { DAG job = context.getCurrentDAG(); Task task = - job.getVertex(taskAttemptId.getTaskID().getVertexID()). + job.getVertex(taskAttemptId.getVertexID()). getTask(taskAttemptId.getTaskID()); return task.canCommit(taskAttemptId); } @@ -488,9 +484,7 @@ public void dagSubmitted() { @Override public void registerRunningContainer(ContainerId containerId, int taskCommId) { - if (LOG.isDebugEnabled()) { - LOG.debug("ContainerId: " + containerId + " registered with TaskAttemptListener"); - } + LOG.debug("ContainerId: {} registered with TaskAttemptListener", containerId); ContainerInfo oldInfo = registeredContainers.put(containerId, NULL_CONTAINER_INFO); if (oldInfo != null) { throw new TezUncheckedException( @@ -515,9 +509,7 @@ public void registerRunningContainer(ContainerId containerId, int taskCommId) { @Override public void unregisterRunningContainer(ContainerId containerId, int taskCommId, ContainerEndReason endReason, String diagnostics) { - if (LOG.isDebugEnabled()) { - LOG.debug("Unregistering Container from TaskAttemptListener: " + containerId); - } + LOG.debug("Unregistering Container from TaskAttemptListener: {}", containerId); ContainerInfo containerInfo = registeredContainers.remove(containerId); if (containerInfo.taskAttemptId != null) { registeredAttempts.remove(containerInfo.taskAttemptId); @@ -685,4 +677,12 @@ public String getCompletedLogsUrl(int taskCommId, TezTaskAttemptID attemptID, No return null; } + @Override + public long getTotalUsedMemory() { + long totalUsedMemory = 0; + for (int i = 0; i < taskCommunicators.length; i++) { + totalUsedMemory += taskCommunicators[i].getTaskCommunicator().getTotalUsedMemory(); + } + return totalUsedMemory; + } } diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/TaskCommunicatorManagerInterface.java b/tez-dag/src/main/java/org/apache/tez/dag/app/TaskCommunicatorManagerInterface.java index 254e74c734..150977a94e 100644 --- a/tez-dag/src/main/java/org/apache/tez/dag/app/TaskCommunicatorManagerInterface.java +++ b/tez-dag/src/main/java/org/apache/tez/dag/app/TaskCommunicatorManagerInterface.java @@ -54,4 +54,5 @@ public interface TaskCommunicatorManagerInterface { String getCompletedLogsUrl(int taskCommId, TezTaskAttemptID attemptID, NodeId containerNodeId); + long getTotalUsedMemory(); } diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/TezLocalTaskCommunicatorImpl.java b/tez-dag/src/main/java/org/apache/tez/dag/app/TezLocalTaskCommunicatorImpl.java index 15d90d3832..b5749591c1 100644 --- a/tez-dag/src/main/java/org/apache/tez/dag/app/TezLocalTaskCommunicatorImpl.java +++ b/tez-dag/src/main/java/org/apache/tez/dag/app/TezLocalTaskCommunicatorImpl.java @@ -39,8 +39,6 @@ protected void startRpcServer() { } catch (UnknownHostException e) { throw new TezUncheckedException(e); } - if (LOG.isDebugEnabled()) { - LOG.debug("Not starting TaskAttemptListener RPC in LocalMode"); - } + LOG.debug("Not starting TaskAttemptListener RPC in LocalMode"); } } diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/TezTaskCommunicatorImpl.java b/tez-dag/src/main/java/org/apache/tez/dag/app/TezTaskCommunicatorImpl.java index 9b700f83df..48aee3103b 100644 --- a/tez-dag/src/main/java/org/apache/tez/dag/app/TezTaskCommunicatorImpl.java +++ b/tez-dag/src/main/java/org/apache/tez/dag/app/TezTaskCommunicatorImpl.java @@ -20,8 +20,8 @@ import java.util.Map; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentMap; +import java.util.Objects; -import com.google.common.base.Preconditions; import com.google.common.collect.Maps; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; @@ -104,6 +104,7 @@ public static final class ContainerInfo { Credentials credentials = null; boolean credentialsChanged = false; boolean taskPulled = false; + long usedMemory = 0; void reset() { taskSpec = null; @@ -221,9 +222,9 @@ public void registerRunningTaskAttempt(ContainerId containerId, TaskSpec taskSpe int priority) { ContainerInfo containerInfo = registeredContainers.get(containerId); - Preconditions.checkNotNull(containerInfo, - "Cannot register task attempt: " + taskSpec.getTaskAttemptID() + " to unknown container: " + - containerId); + Objects.requireNonNull(containerInfo, + String.format("Cannot register task attempt %s to unknown container %s", + taskSpec.getTaskAttemptID(), containerId)); synchronized (containerInfo) { if (containerInfo.taskSpec != null) { throw new TezUncheckedException( @@ -310,18 +311,14 @@ public ContainerTask getTask(ContainerContext containerContext) throws IOExcepti } else { ContainerId containerId = ConverterUtils.toContainerId(containerContext .getContainerIdentifier()); - if (LOG.isDebugEnabled()) { - LOG.debug("Container with id: " + containerId + " asked for a task"); - } + LOG.debug("Container with id: {} asked for a task", containerId); task = getContainerTask(containerId); if (task != null && !task.shouldDie()) { getContext().taskSubmitted(task.getTaskSpec().getTaskAttemptID(), containerId); getContext().taskStartedRemotely(task.getTaskSpec().getTaskAttemptID()); } } - if (LOG.isDebugEnabled()) { - LOG.debug("getTask returning task: " + task); - } + LOG.debug("getTask returning task: {}", task); return task; } @@ -335,10 +332,7 @@ public TezHeartbeatResponse heartbeat(TezHeartbeatRequest request) throws IOExce TezException { ContainerId containerId = ConverterUtils.toContainerId(request.getContainerIdentifier()); long requestId = request.getRequestId(); - if (LOG.isDebugEnabled()) { - LOG.debug("Received heartbeat from container" - + ", request=" + request); - } + LOG.debug("Received heartbeat from container, request={}", request); ContainerInfo containerInfo = registeredContainers.get(containerId); if (containerInfo == null) { @@ -389,6 +383,7 @@ public TezHeartbeatResponse heartbeat(TezHeartbeatRequest request) throws IOExce response.setLastRequestId(requestId); containerInfo.lastRequestId = requestId; containerInfo.lastResponse = response; + containerInfo.usedMemory = request.getUsedMemory(); return response; } @@ -436,9 +431,7 @@ private ContainerTask getContainerTask(ContainerId containerId) throws IOExcepti } } else { task = null; - if (LOG.isDebugEnabled()) { - LOG.debug("No task assigned yet for running container: " + containerId); - } + LOG.debug("No task assigned yet for running container: {}", containerId); } } } @@ -475,4 +468,8 @@ protected ContainerInfo getContainerInfo(ContainerId containerId) { protected ContainerId getContainerForAttempt(TezTaskAttemptID taskAttemptId) { return attemptToContainerMap.get(taskAttemptId); } + + public long getTotalUsedMemory() { + return registeredContainers.values().stream().mapToLong(c -> c.usedMemory).sum(); + } } \ No newline at end of file diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/ThreadDumpDAGHook.java b/tez-dag/src/main/java/org/apache/tez/dag/app/ThreadDumpDAGHook.java new file mode 100644 index 0000000000..ff657e47f1 --- /dev/null +++ b/tez-dag/src/main/java/org/apache/tez/dag/app/ThreadDumpDAGHook.java @@ -0,0 +1,41 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tez.dag.app; + +import org.apache.hadoop.conf.Configuration; +import org.apache.tez.dag.records.TezDAGID; +import org.apache.tez.runtime.TezThreadDumpHelper; +import org.apache.tez.runtime.hook.TezDAGHook; + +/** + * A DAG hook which dumps thread information periodically. + */ +public class ThreadDumpDAGHook implements TezDAGHook { + private TezThreadDumpHelper helper; + + @Override + public void start(TezDAGID id, Configuration conf) { + helper = TezThreadDumpHelper.getInstance(conf).start(id.toString()); + } + + @Override + public void stop() { + helper.stop(); + } +} diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/DAG.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/DAG.java index 6c8e8f9b57..c828d81b2b 100644 --- a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/DAG.java +++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/DAG.java @@ -22,11 +22,11 @@ import java.util.Map; import java.util.Set; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.security.Credentials; import org.apache.hadoop.security.UserGroupInformation; +import org.apache.hadoop.yarn.api.records.Container; import org.apache.hadoop.yarn.api.records.LocalResource; import org.apache.hadoop.yarn.event.EventHandler; +import org.apache.tez.common.counters.DAGCounter; import org.apache.tez.common.counters.TezCounters; import org.apache.tez.dag.api.TezException; import org.apache.tez.dag.api.client.DAGStatusBuilder; @@ -38,6 +38,8 @@ import org.apache.tez.dag.records.TezVertexID; import org.apache.tez.serviceplugins.api.DagInfo; +import javax.annotation.Nullable; + /** * Main interface to interact with the job. */ @@ -68,15 +70,12 @@ public interface DAG extends DagInfo { Map getVertices(); Vertex getVertex(TezVertexID vertexId); List getDiagnostics(); - int getTotalVertices(); int getSuccessfulVertices(); float getProgress(); float getCompletedTaskProgress(); boolean isUber(); String getUserName(); - Configuration getConf(); - DAGPlan getJobPlan(); DAGStatusBuilder getDAGStatus(Set statusOptions); DAGStatusBuilder getDAGStatus(Set statusOptions, long timeout) @@ -98,4 +97,25 @@ VertexStatusBuilder getVertexStatus(String vertexName, org.apache.tez.dag.api.Vertex.VertexExecutionContext getDefaultExecutionContext(); + /** + * + * @return the DAGScheduler that will schedule + * this DAG, null if it doesn't exist + */ + @Nullable DAGScheduler getDAGScheduler(); + + void incrementDagCounter(DAGCounter counter, int incrValue); + void setDagCounter(DAGCounter counter, int setValue); + void addUsedContainer(Container container); + + /** + * Called by the DAGAppMaster when the DAG is started normally or in the event of recovery. + */ + void onStart(); + + /** + * Called by the DAGAppMaster when the DAG is finished, or there is a currentDAG on AM stop. + * The implementation of this method should be idempontent. + */ + void onFinish(); } diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/DAGScheduler.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/DAGScheduler.java index 3055cd30da..ada7867a6a 100644 --- a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/DAGScheduler.java +++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/DAGScheduler.java @@ -52,7 +52,7 @@ public void addVertexConcurrencyLimit(TezVertexID vId, int concurrency) { public void scheduleTask(DAGEventSchedulerUpdate event) { VertexInfo vInfo = null; if (vertexInfo != null) { - vInfo = vertexInfo.get(event.getAttempt().getID().getTaskID().getVertexID()); + vInfo = vertexInfo.get(event.getVertexID()); } scheduleTaskWithLimit(event, vInfo); } @@ -60,7 +60,7 @@ public void scheduleTask(DAGEventSchedulerUpdate event) { private void scheduleTaskWithLimit(DAGEventSchedulerUpdate event, VertexInfo vInfo) { if (vInfo != null) { if (vInfo.concurrency >= vInfo.concurrencyLimit) { - vInfo.pendingAttempts.put(event.getAttempt().getID(), event); + vInfo.pendingAttempts.put(event.getTaskAttemptID(), event); return; // already at max concurrency } vInfo.concurrency++; @@ -71,9 +71,9 @@ private void scheduleTaskWithLimit(DAGEventSchedulerUpdate event, VertexInfo vIn public void taskCompleted(DAGEventSchedulerUpdate event) { taskCompletedEx(event); if (vertexInfo != null) { - VertexInfo vInfo = vertexInfo.get(event.getAttempt().getID().getTaskID().getVertexID()); + VertexInfo vInfo = vertexInfo.get(event.getVertexID()); if (vInfo != null) { - if(vInfo.pendingAttempts.remove(event.getAttempt().getID()) == null) { + if(vInfo.pendingAttempts.remove(event.getTaskAttemptID()) == null) { vInfo.concurrency--; if(!vInfo.pendingAttempts.isEmpty()) { Iterator i = vInfo.pendingAttempts.values().iterator(); @@ -89,4 +89,26 @@ public void taskCompleted(DAGEventSchedulerUpdate event) { public abstract void scheduleTaskEx(DAGEventSchedulerUpdate event); public abstract void taskCompletedEx(DAGEventSchedulerUpdate event); + + /** + * Get the low limit priority for a particular vertex. + * @param vertex to get the priority of + * @return the priority + */ + public int getPriorityLowLimit(final DAG dag, final Vertex vertex) { + final int vertexDistanceFromRoot = vertex.getDistanceFromRoot(); + return ((vertexDistanceFromRoot + 1) * dag.getTotalVertices() * 3) + + (vertex.getVertexId().getId() * 3); + } + + /** + * Get the low hight priority for a particular vertex. Default + * to the low limit priority minus two. + * @param vertex to get the priority of + * @return the priority + */ + public int getPriorityHighLimit(final DAG dag, final Vertex vertex) { + return getPriorityLowLimit(dag, vertex) - 2; + } + } diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/RootInputInitializerManager.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/RootInputInitializerManager.java index e03b469a21..f3e94993ca 100644 --- a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/RootInputInitializerManager.java +++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/RootInputInitializerManager.java @@ -34,8 +34,9 @@ import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.atomic.AtomicBoolean; +import java.util.Objects; -import com.google.common.base.Preconditions; +import org.apache.tez.common.Preconditions; import com.google.common.collect.LinkedListMultimap; import com.google.common.collect.ListMultimap; import com.google.common.collect.Lists; @@ -45,6 +46,7 @@ import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.yarn.event.EventHandler; +import org.apache.tez.common.GuavaShim; import org.apache.tez.common.ReflectionUtils; import org.apache.tez.common.TezUtilsInternal; import org.apache.tez.dag.api.InputDescriptor; @@ -107,7 +109,7 @@ public RootInputInitializerManager(Vertex vertex, AppContext appContext, this.dagUgi = dagUgi; this.entityStateTracker = stateTracker; } - + public void runInputInitializers(List> inputs) throws TezException { for (RootInputLeafOutput input : inputs) { @@ -127,7 +129,8 @@ public void runInputInitializers(List vertexUpdateRegistrations = pendingVertexRegistrations.removeAll(input.getName()); + List vertexUpdateRegistrations = + pendingVertexRegistrations.removeAll(input.getName()); if (vertexUpdateRegistrations != null) { for (VertexUpdateRegistrationHolder h : vertexUpdateRegistrations) { initializerWrapper.registerForVertexStateUpdates(h.vertexName, h.stateSet); @@ -137,7 +140,7 @@ public void runInputInitializers(List> future = executor .submit(new InputInitializerCallable(initializerWrapper, dagUgi, appContext)); - Futures.addCallback(future, createInputInitializerCallback(initializerWrapper)); + Futures.addCallback(future, createInputInitializerCallback(initializerWrapper), GuavaShim.directExecutor()); } } @@ -175,7 +178,7 @@ public void handleInitializerEvents(List events) { InputInitializerEvent event = (InputInitializerEvent)tezEvent.getEvent(); Preconditions.checkState(vertex.getName().equals(event.getTargetVertexName()), "Received event for incorrect vertex"); - Preconditions.checkNotNull(event.getTargetInputName(), "target input name must be set"); + Objects.requireNonNull(event.getTargetInputName(), "target input name must be set"); InitializerWrapper initializer = initializerMap.get(event.getTargetInputName()); Preconditions.checkState(initializer != null, "Received event for unknown input : " + event.getTargetInputName()); @@ -220,8 +223,8 @@ private VertexUpdateRegistrationHolder(String vertexName, Set stateSet) { - Preconditions.checkNotNull(vertexName, "VertexName cannot be null: " + vertexName); - Preconditions.checkNotNull(inputName, "InputName cannot be null"); + Objects.requireNonNull(vertexName, "VertexName cannot be null: " + vertexName); + Objects.requireNonNull(inputName, "InputName cannot be null"); InitializerWrapper initializer = initializerMap.get(inputName); if (initializer == null) { pendingVertexRegistrations.put(inputName, new VertexUpdateRegistrationHolder(vertexName, stateSet)); @@ -257,7 +260,7 @@ private static class InputInitializerCallable implements private final UserGroupInformation ugi; private final AppContext appContext; - public InputInitializerCallable(InitializerWrapper initializer, UserGroupInformation ugi, + InputInitializerCallable(InitializerWrapper initializer, UserGroupInformation ugi, AppContext appContext) { this.initializerWrapper = initializer; this.ugi = ugi; @@ -269,9 +272,8 @@ public List call() throws Exception { List events = ugi.doAs(new PrivilegedExceptionAction>() { @Override public List run() throws Exception { - LOG.info( - "Starting InputInitializer for Input: " + initializerWrapper.getInput().getName() + - " on vertex " + initializerWrapper.getVertexLogIdentifier()); + LOG.info("Starting InputInitializer for Input: {} on vertex {}", initializerWrapper.getInput().getName(), + initializerWrapper.getVertexLogIdentifier()); try { TezUtilsInternal.setHadoopCallerContext(appContext.getHadoopShim(), initializerWrapper.vertexId); @@ -294,7 +296,7 @@ private static class InputInitializerCallback implements private final EventHandler eventHandler; private final TezVertexID vertexID; - public InputInitializerCallback(InitializerWrapper initializer, + InputInitializerCallback(InitializerWrapper initializer, EventHandler eventHandler, TezVertexID vertexID) { this.initializer = initializer; this.eventHandler = eventHandler; @@ -326,7 +328,7 @@ public void onFailure(Throwable t) { " on vertex " + initializer.getVertexLogIdentifier()); eventHandler .handle(new VertexEventRootInputFailed(vertexID, initializer.getInput().getName(), - new AMUserCodeException(Source.InputInitializer,t))); + new AMUserCodeException(Source.InputInitializer, t))); } } @@ -454,7 +456,7 @@ public void onTaskSucceeded(String vertexName, TezTaskID taskId, int attemptId) Iterator eventIterator = events.iterator(); while (eventIterator.hasNext()) { TezEvent tezEvent = eventIterator.next(); - int taskIndex = tezEvent.getSourceInfo().getTaskAttemptID().getTaskID().getId(); + int taskIndex = tezEvent.getSourceInfo().getTaskID().getId(); int taskAttemptIndex = tezEvent.getSourceInfo().getTaskAttemptID().getId(); if (taskIndex == taskId.getId()) { // Process only if there's a pending event for the specific succeeded task @@ -473,7 +475,7 @@ public void handleInputInitializerEvents(Collection tezEvents) { List toForwardEvents = new LinkedList(); for (TezEvent tezEvent : tezEvents) { String srcVertexName = tezEvent.getSourceInfo().getTaskVertexName(); - int taskIndex = tezEvent.getSourceInfo().getTaskAttemptID().getTaskID().getId(); + int taskIndex = tezEvent.getSourceInfo().getTaskID().getId(); int taskAttemptIndex = tezEvent.getSourceInfo().getTaskAttemptID().getId(); Map vertexSuccessfulAttemptMap = @@ -493,7 +495,7 @@ public void handleInputInitializerEvents(Collection tezEvents) { Vertex srcVertex = appContext.getCurrentDAG().getVertex(srcVertexName); Task task = srcVertex.getTask(taskIndex); if (task.getState() == TaskState.SUCCEEDED) { - successfulAttemptInteger = task.getSuccessfulAttempt().getID().getId(); + successfulAttemptInteger = task.getSuccessfulAttempt().getTaskAttemptID().getId(); vertexSuccessfulAttemptMap.put(taskIndex, successfulAttemptInteger); } } diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/StateChangeNotifier.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/StateChangeNotifier.java index bd04fd8913..7d13f930c0 100644 --- a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/StateChangeNotifier.java +++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/StateChangeNotifier.java @@ -25,9 +25,9 @@ import java.util.concurrent.BlockingQueue; import java.util.concurrent.LinkedBlockingQueue; import java.util.concurrent.locks.ReentrantReadWriteLock; +import java.util.Objects; import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; import com.google.common.collect.HashMultimap; import com.google.common.collect.LinkedListMultimap; import com.google.common.collect.ListMultimap; @@ -268,7 +268,7 @@ public int hashCode() { public void registerForTaskSuccessUpdates(String vertexName, TaskStateUpdateListener listener) { TezVertexID vertexId = validateAndGetVertexId(vertexName); - Preconditions.checkNotNull(listener, "listener cannot be null"); + Objects.requireNonNull(listener, "listener cannot be null"); taskWriteLock.lock(); try { taskListeners.put(vertexId, listener); @@ -279,7 +279,7 @@ public void registerForTaskSuccessUpdates(String vertexName, TaskStateUpdateList public void unregisterForTaskSuccessUpdates(String vertexName, TaskStateUpdateListener listener) { TezVertexID vertexId = validateAndGetVertexId(vertexName); - Preconditions.checkNotNull(listener, "listener cannot be null"); + Objects.requireNonNull(listener, "listener cannot be null"); taskWriteLock.lock(); try { taskListeners.remove(vertexId, listener); @@ -303,9 +303,9 @@ public void taskSucceeded(String vertexName, TezTaskID taskId, int attemptId) { private TezVertexID validateAndGetVertexId(String vertexName) { - Preconditions.checkNotNull(vertexName, "VertexName cannot be null"); + Objects.requireNonNull(vertexName, "VertexName cannot be null"); Vertex vertex = dag.getVertex(vertexName); - Preconditions.checkNotNull(vertex, "Vertex does not exist: " + vertexName); + Objects.requireNonNull(vertex, "Vertex does not exist: " + vertexName); return vertex.getVertexId(); } diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/Task.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/Task.java index d1b9b2a6ce..cb1bc33866 100644 --- a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/Task.java +++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/Task.java @@ -21,21 +21,22 @@ import java.util.ArrayList; import java.util.List; import java.util.Map; +import java.util.Set; +import org.apache.hadoop.yarn.api.records.NodeId; import org.apache.tez.common.counters.TezCounters; import org.apache.tez.dag.api.TaskLocationHint; import org.apache.tez.dag.api.oldrecords.TaskReport; import org.apache.tez.dag.api.oldrecords.TaskState; +import org.apache.tez.dag.records.TaskIDAware; import org.apache.tez.dag.records.TezTaskAttemptID; -import org.apache.tez.dag.records.TezTaskID; import org.apache.tez.runtime.api.impl.TaskSpec; import org.apache.tez.runtime.api.impl.TezEvent; /** * Read only view of Task. */ -public interface Task { - TezTaskID getTaskId(); +public interface Task extends TaskIDAware { TaskReport getReport(); TaskState getState(); TezCounters getCounters(); @@ -73,4 +74,10 @@ public ArrayList getTaskAttemptTezEvents(TezTaskAttemptID attemptID, long getFirstAttemptStartTime(); long getFinishTime(); + + /** + * @return set of nodes on which previous attempts were running on, at the time + * of latest attempt being scheduled. + */ + Set getNodesWithRunningAttempts(); } diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/TaskAttempt.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/TaskAttempt.java index ba09bd9589..f51b576dfe 100644 --- a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/TaskAttempt.java +++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/TaskAttempt.java @@ -21,6 +21,7 @@ import java.util.List; import java.util.concurrent.atomic.AtomicBoolean; +import com.google.common.annotations.VisibleForTesting; import org.apache.hadoop.yarn.api.records.Container; import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.NodeId; @@ -28,17 +29,15 @@ import org.apache.tez.common.counters.TezCounters; import org.apache.tez.dag.api.oldrecords.TaskAttemptReport; import org.apache.tez.dag.api.oldrecords.TaskAttemptState; +import org.apache.tez.dag.records.TaskAttemptIDAware; import org.apache.tez.dag.records.TaskAttemptTerminationCause; -import org.apache.tez.dag.records.TezDAGID; import org.apache.tez.dag.records.TezTaskAttemptID; -import org.apache.tez.dag.records.TezTaskID; -import org.apache.tez.dag.records.TezVertexID; import org.apache.tez.runtime.api.impl.TezEvent; /** * Read only view of TaskAttempt. */ -public interface TaskAttempt { +public interface TaskAttempt extends TaskAttemptIDAware { public static class TaskAttemptStatus { public TezTaskAttemptID id; @@ -65,17 +64,20 @@ public void setLocalityCounter(DAGCounter localityCounter) { } } } + + @VisibleForTesting + public void setCounters(TezCounters counters) { + this.counters = counters; + } } - - TezTaskAttemptID getID(); - TezTaskID getTaskID(); - TezVertexID getVertexID(); - TezDAGID getDAGID(); - + + Task getTask(); TaskAttemptReport getReport(); List getDiagnostics(); TaskAttemptTerminationCause getTerminationCause(); TezCounters getCounters(); + @VisibleForTesting + void setCounters(TezCounters counters); float getProgress(); TaskAttemptState getState(); TaskAttemptState getStateNoLock(); @@ -135,5 +137,4 @@ public void setLocalityCounter(DAGCounter localityCounter) { * yet, returns 0. */ long getFinishTime(); - } diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/Vertex.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/Vertex.java index 0a6e9c5bc9..ff83e19a85 100644 --- a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/Vertex.java +++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/Vertex.java @@ -26,6 +26,7 @@ import javax.annotation.Nullable; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.service.AbstractService; import org.apache.hadoop.yarn.api.records.Resource; import org.apache.tez.common.counters.TezCounters; import org.apache.tez.dag.api.EdgeManagerPluginDescriptor; @@ -73,7 +74,6 @@ public interface Vertex extends Comparable { LinkedHashMap getIOIndices(); String getName(); VertexState getState(); - /** * Get all the counters of this vertex. * @return aggregate task-counters @@ -86,6 +86,12 @@ public interface Vertex extends Comparable { */ TezCounters getCachedCounters(); + /** + * Add custom counters to the vertex + * @param tezCounters counters to add + */ + void addCounters(TezCounters tezCounters); + int getMaxTaskConcurrency(); Map getTasks(); Task getTask(TezTaskID taskID); @@ -163,7 +169,10 @@ public TaskAttemptEventInfo getTaskAttemptTezEvents(TezTaskAttemptID attemptID, int fromEventId, int nextPreRoutedFromEventId, int maxEvents); void handleSpeculatorEvent(SpeculatorEvent event); - + AbstractService getSpeculator(); + void initServices(); + void startServices(); + void stopServices(); ProcessorDescriptor getProcessorDescriptor(); public DAG getDAG(); @Nullable @@ -205,6 +214,29 @@ public TaskAttemptEventInfo getTaskAttemptTezEvents(TezTaskAttemptID attemptID, interface VertexConfig { int getMaxFailedTaskAttempts(); + int getMaxTaskAttempts(); boolean getTaskRescheduleHigherPriority(); + boolean getTaskRescheduleRelaxedLocality(); + + /** + * @return tez.task.max.allowed.output.failures. + */ + int getMaxAllowedOutputFailures(); + /** + * @return tez.task.max.allowed.output.failures.fraction. + */ + double getMaxAllowedOutputFailuresFraction(); + /** + * @return tez.am.max.allowed.time-sec.for-read-error. + */ + int getMaxAllowedTimeForTaskReadErrorSec(); + /** + * @return tez.am.max.allowed.downstream.host.failures.fraction. + */ + double getMaxAllowedDownstreamHostFailuresFraction(); } + + void incrementRejectedTaskAttemptCount(); + int getRejectedTaskAttemptCount(); + Map> getDownstreamBlamingHosts(); } diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/DAGAppMasterEventUserServiceFatalError.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/DAGAppMasterEventUserServiceFatalError.java index 7bc3bd8e9e..728af783d5 100644 --- a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/DAGAppMasterEventUserServiceFatalError.java +++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/DAGAppMasterEventUserServiceFatalError.java @@ -16,7 +16,7 @@ import java.util.EnumSet; -import com.google.common.base.Preconditions; +import org.apache.tez.common.Preconditions; public class DAGAppMasterEventUserServiceFatalError extends DAGAppMasterEvent implements DiagnosableEvent { diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/DAGEvent.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/DAGEvent.java index a0a8a1af8e..4eed8ff984 100644 --- a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/DAGEvent.java +++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/DAGEvent.java @@ -19,13 +19,14 @@ package org.apache.tez.dag.app.dag.event; import org.apache.tez.common.TezAbstractEvent; +import org.apache.tez.dag.records.DAGIDAware; import org.apache.tez.dag.records.TezDAGID; /** * This class encapsulates job related events. * */ -public class DAGEvent extends TezAbstractEvent { +public class DAGEvent extends TezAbstractEvent implements DAGIDAware { private TezDAGID dagId; @@ -34,7 +35,8 @@ public DAGEvent(TezDAGID dagId, DAGEventType type) { this.dagId = dagId; } - public TezDAGID getDAGId() { + @Override + public TezDAGID getDAGID() { return dagId; } diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/DAGEventCounterUpdate.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/DAGEventCounterUpdate.java index da0724dd20..3683a4951b 100644 --- a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/DAGEventCounterUpdate.java +++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/DAGEventCounterUpdate.java @@ -29,7 +29,7 @@ public class DAGEventCounterUpdate extends DAGEvent { public DAGEventCounterUpdate(TezDAGID dagId) { super(dagId, DAGEventType.DAG_COUNTER_UPDATE); - counterUpdates = new ArrayList(); + counterUpdates = new ArrayList<>(); } public void addCounterUpdate(Enum key, long incrValue) { @@ -56,5 +56,10 @@ public Enum getCounterKey() { public long getIncrementValue() { return incrValue; } + + @Override + public String toString(){ + return String.format("DAGEventCounterUpdate.CounterIncrementalUpdate(key=%s, incrValue=%d)", key, incrValue); + } } } diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/DAGEventSchedulerUpdate.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/DAGEventSchedulerUpdate.java index eda02b52da..1dedaecad9 100644 --- a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/DAGEventSchedulerUpdate.java +++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/DAGEventSchedulerUpdate.java @@ -19,9 +19,11 @@ package org.apache.tez.dag.app.dag.event; import org.apache.tez.dag.app.dag.TaskAttempt; +import org.apache.tez.dag.records.TaskAttemptIDAware; +import org.apache.tez.dag.records.TezTaskAttemptID; + +public class DAGEventSchedulerUpdate extends DAGEvent implements TaskAttemptIDAware { -public class DAGEventSchedulerUpdate extends DAGEvent { - public enum UpdateType { TA_SCHEDULE, TA_COMPLETED @@ -31,7 +33,7 @@ public enum UpdateType { private final UpdateType updateType; public DAGEventSchedulerUpdate(UpdateType updateType, TaskAttempt attempt) { - super(attempt.getDAGID(), + super(attempt.getDAGID(), DAGEventType.DAG_SCHEDULER_UPDATE); this.attempt = attempt; this.updateType = updateType; @@ -44,4 +46,9 @@ public UpdateType getUpdateType() { public TaskAttempt getAttempt() { return attempt; } + + @Override + public TezTaskAttemptID getTaskAttemptID() { + return attempt.getTaskAttemptID(); + } } diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/DAGEventVertexCompleted.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/DAGEventVertexCompleted.java index e58d46e834..2c18be85dd 100644 --- a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/DAGEventVertexCompleted.java +++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/DAGEventVertexCompleted.java @@ -29,7 +29,7 @@ public class DAGEventVertexCompleted extends DAGEvent { private final VertexTerminationCause terminationCause; public DAGEventVertexCompleted(TezVertexID vertexId, VertexState vertexState) { - super(vertexId.getDAGId(), DAGEventType.DAG_VERTEX_COMPLETED); + super(vertexId.getDAGID(), DAGEventType.DAG_VERTEX_COMPLETED); this.vertexId = vertexId; this.vertexState = vertexState; this.terminationCause = null; @@ -37,7 +37,7 @@ public DAGEventVertexCompleted(TezVertexID vertexId, VertexState vertexState) { public DAGEventVertexCompleted(TezVertexID vertexId, VertexState vertexState, VertexTerminationCause terminationCause) { - super(vertexId.getDAGId(), DAGEventType.DAG_VERTEX_COMPLETED); + super(vertexId.getDAGID(), DAGEventType.DAG_VERTEX_COMPLETED); this.vertexId = vertexId; this.vertexState = vertexState; this.terminationCause = terminationCause; diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/DAGEventVertexReRunning.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/DAGEventVertexReRunning.java index 303d48d375..30e595906b 100644 --- a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/DAGEventVertexReRunning.java +++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/DAGEventVertexReRunning.java @@ -25,7 +25,7 @@ public class DAGEventVertexReRunning extends DAGEvent { private TezVertexID vertexId; public DAGEventVertexReRunning(TezVertexID vertexId) { - super(vertexId.getDAGId(), DAGEventType.DAG_VERTEX_RERUNNING); + super(vertexId.getDAGID(), DAGEventType.DAG_VERTEX_RERUNNING); this.vertexId = vertexId; } diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/SpeculatorEventTaskAttemptStatusUpdate.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/SpeculatorEventTaskAttemptStatusUpdate.java index d5745c4df4..7ab6141c39 100644 --- a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/SpeculatorEventTaskAttemptStatusUpdate.java +++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/SpeculatorEventTaskAttemptStatusUpdate.java @@ -34,7 +34,7 @@ public SpeculatorEventTaskAttemptStatusUpdate(TezTaskAttemptID taId, TaskAttempt public SpeculatorEventTaskAttemptStatusUpdate(TezTaskAttemptID taId, TaskAttemptState state, long timestamp, boolean justStarted) { - super(SpeculatorEventType.S_TASK_ATTEMPT_STATUS_UPDATE, taId.getTaskID().getVertexID()); + super(SpeculatorEventType.S_TASK_ATTEMPT_STATUS_UPDATE, taId.getVertexID()); this.id = taId; this.state = state; this.timestamp = timestamp; diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/TaskAttemptEvent.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/TaskAttemptEvent.java index 63ef70feb4..d5d8481a54 100644 --- a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/TaskAttemptEvent.java +++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/TaskAttemptEvent.java @@ -19,13 +19,14 @@ package org.apache.tez.dag.app.dag.event; import org.apache.tez.common.TezAbstractEvent; +import org.apache.tez.dag.records.TaskAttemptIDAware; import org.apache.tez.dag.records.TezTaskAttemptID; /** * This class encapsulates task attempt related events. * */ -public class TaskAttemptEvent extends TezAbstractEvent { +public class TaskAttemptEvent extends TezAbstractEvent implements TaskAttemptIDAware { private TezTaskAttemptID attemptID; @@ -39,6 +40,7 @@ public TaskAttemptEvent(TezTaskAttemptID id, TaskAttemptEventType type) { this.attemptID = id; } + @Override public TezTaskAttemptID getTaskAttemptID() { return attemptID; } diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/TaskAttemptEventAttemptFailed.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/TaskAttemptEventAttemptFailed.java index 299847c31a..d4c7273c97 100644 --- a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/TaskAttemptEventAttemptFailed.java +++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/TaskAttemptEventAttemptFailed.java @@ -18,7 +18,8 @@ package org.apache.tez.dag.app.dag.event; -import com.google.common.base.Preconditions; +import java.util.Objects; + import org.apache.tez.dag.records.TaskAttemptTerminationCause; import org.apache.tez.dag.records.TezTaskAttemptID; import org.apache.tez.runtime.api.TaskFailureType; @@ -44,7 +45,7 @@ public TaskAttemptEventAttemptFailed(TezTaskAttemptID id, TaskAttemptEventType type, TaskFailureType taskFailureType, String diagnostics, TaskAttemptTerminationCause errorCause, boolean isFromRecovery) { super(id, type); - Preconditions.checkNotNull(taskFailureType, "FailureType must be set for a FAILED task attempt"); + Objects.requireNonNull(taskFailureType, "FailureType must be set for a FAILED task attempt"); this.diagnostics = diagnostics; this.errorCause = errorCause; this.taskFailureType = taskFailureType; diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/TaskAttemptEventOutputFailed.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/TaskAttemptEventOutputFailed.java index 6bc110a1a0..fbdd2305c7 100644 --- a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/TaskAttemptEventOutputFailed.java +++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/TaskAttemptEventOutputFailed.java @@ -28,9 +28,9 @@ public class TaskAttemptEventOutputFailed extends TaskAttemptEvent private TezEvent inputFailedEvent; private int consumerTaskNumber; - public TaskAttemptEventOutputFailed(TezTaskAttemptID attemptId, + public TaskAttemptEventOutputFailed(TezTaskAttemptID sourceTaskAttemptId, TezEvent tezEvent, int numConsumers) { - super(attemptId, TaskAttemptEventType.TA_OUTPUT_FAILED); + super(sourceTaskAttemptId, TaskAttemptEventType.TA_OUTPUT_FAILED); this.inputFailedEvent = tezEvent; this.consumerTaskNumber = numConsumers; } diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/TaskEvent.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/TaskEvent.java index def9ddfa7c..9e741388fa 100644 --- a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/TaskEvent.java +++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/TaskEvent.java @@ -19,13 +19,14 @@ package org.apache.tez.dag.app.dag.event; import org.apache.tez.common.TezAbstractEvent; +import org.apache.tez.dag.records.TaskIDAware; import org.apache.tez.dag.records.TezTaskID; /** * this class encapsulates task related events. * */ -public class TaskEvent extends TezAbstractEvent { +public class TaskEvent extends TezAbstractEvent implements TaskIDAware { private TezTaskID taskId; @@ -34,6 +35,7 @@ public TaskEvent(TezTaskID taskId, TaskEventType type) { this.taskId = taskId; } + @Override public TezTaskID getTaskID() { return taskId; } diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/TaskEventTAFailed.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/TaskEventTAFailed.java index d6f1526c3a..f68549dd6b 100644 --- a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/TaskEventTAFailed.java +++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/TaskEventTAFailed.java @@ -18,7 +18,8 @@ package org.apache.tez.dag.app.dag.event; -import com.google.common.base.Preconditions; +import java.util.Objects; + import org.apache.tez.common.TezAbstractEvent; import org.apache.tez.dag.records.TezTaskAttemptID; import org.apache.tez.runtime.api.TaskFailureType; @@ -31,7 +32,7 @@ public class TaskEventTAFailed extends TaskEventTAUpdate { public TaskEventTAFailed(TezTaskAttemptID id, TaskFailureType taskFailureType, TezAbstractEvent causalEvent) { super(id, TaskEventType.T_ATTEMPT_FAILED); - Preconditions.checkNotNull(taskFailureType, "FailureType must be specified for a failed attempt"); + Objects.requireNonNull(taskFailureType, "FailureType must be specified for a failed attempt"); this.taskFailureType = taskFailureType; this.causalEvent = causalEvent; } diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/VertexEvent.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/VertexEvent.java index 33128e4536..6957a50dc7 100644 --- a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/VertexEvent.java +++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/VertexEvent.java @@ -20,12 +20,13 @@ import org.apache.tez.common.TezAbstractEvent; import org.apache.tez.dag.records.TezVertexID; +import org.apache.tez.dag.records.VertexIDAware; /** * this class encapsulates vertex related events. * */ -public class VertexEvent extends TezAbstractEvent { +public class VertexEvent extends TezAbstractEvent implements VertexIDAware { private TezVertexID vertexId; @@ -34,7 +35,8 @@ public VertexEvent(TezVertexID vertexId, VertexEventType type) { this.vertexId = vertexId; } - public TezVertexID getVertexId() { + @Override + public TezVertexID getVertexID() { return vertexId; } } diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/VertexEventTaskAttemptCompleted.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/VertexEventTaskAttemptCompleted.java index 5b07674422..6954f12e89 100644 --- a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/VertexEventTaskAttemptCompleted.java +++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/VertexEventTaskAttemptCompleted.java @@ -24,21 +24,21 @@ public class VertexEventTaskAttemptCompleted extends VertexEvent { private TezTaskAttemptID attemptId; - private TaskAttemptStateInternal attempState; - + private TaskAttemptStateInternal attemptState; + public VertexEventTaskAttemptCompleted(TezTaskAttemptID taskAttemptId, TaskAttemptStateInternal state) { - super(taskAttemptId.getTaskID().getVertexID(), + super(taskAttemptId.getVertexID(), VertexEventType.V_TASK_ATTEMPT_COMPLETED); this.attemptId = taskAttemptId; - this.attempState = state; + this.attemptState = state; } public TezTaskAttemptID getTaskAttemptId() { return attemptId; } - + public TaskAttemptStateInternal getTaskAttemptState() { - return attempState; + return attemptState; } } diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/VertexEventType.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/VertexEventType.java index 15be94dfa1..ed325290cb 100644 --- a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/VertexEventType.java +++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/VertexEventType.java @@ -34,6 +34,7 @@ public enum VertexEventType { V_START, V_SOURCE_TASK_ATTEMPT_COMPLETED, V_SOURCE_VERTEX_STARTED, + V_DELETE_SHUFFLE_DATA, //Producer:Task V_TASK_COMPLETED, diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/VertexShuffleDataDeletion.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/VertexShuffleDataDeletion.java new file mode 100644 index 0000000000..8ea3a154b3 --- /dev/null +++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/event/VertexShuffleDataDeletion.java @@ -0,0 +1,43 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tez.dag.app.dag.event; + +import org.apache.tez.dag.app.dag.Vertex; + + +public class VertexShuffleDataDeletion extends VertexEvent { + // child vertex + private Vertex sourceVertex; + // parent vertex + private Vertex targetVertex; + + public VertexShuffleDataDeletion(Vertex sourceVertex, Vertex targetVertex) { + super(targetVertex.getVertexId(), VertexEventType.V_DELETE_SHUFFLE_DATA); + this.sourceVertex = sourceVertex; + this.targetVertex = targetVertex; + } + + public Vertex getSourceVertex() { + return sourceVertex; + } + + public Vertex getTargetVertex() { + return targetVertex; + } +} \ No newline at end of file diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/BroadcastEdgeManager.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/BroadcastEdgeManager.java index ca510f7fc4..867403fcd5 100644 --- a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/BroadcastEdgeManager.java +++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/BroadcastEdgeManager.java @@ -53,8 +53,7 @@ public int getNumSourceTaskPhysicalOutputs(int sourceTaskIndex) { public void routeDataMovementEventToDestination(DataMovementEvent event, int sourceTaskIndex, int sourceOutputIndex, Map> destinationTaskAndInputIndices) { - List inputIndices = - Collections.unmodifiableList(Collections.singletonList(sourceTaskIndex)); + List inputIndices = Collections.singletonList(sourceTaskIndex); // for each task make the i-th source task as the i-th physical input for (int i=0; i> destinationTaskAndInputIndices) { - List inputIndices = - Collections.unmodifiableList(Collections.singletonList(sourceTaskIndex)); + List inputIndices = Collections.singletonList(sourceTaskIndex); // for each task make the i-th source task as the i-th physical input for (int i=0; i amInfos; private final Lock dagStatusLock = new ReentrantLock(); - private final Condition dagCompletionCondition = dagStatusLock.newCondition(); + private final Condition dagStateChangedCondition = dagStatusLock.newCondition(); private final AtomicBoolean isFinalState = new AtomicBoolean(false); + private final AtomicBoolean runningStatusYetToBeConsumed = new AtomicBoolean(false); private final Lock readLock; private final Lock writeLock; private final String dagName; @@ -190,6 +203,7 @@ public class DAGImpl implements org.apache.tez.dag.app.dag.DAG, volatile Map vertices = new HashMap(); @VisibleForTesting Map edges = new HashMap(); + ArrayList vertexDescendants; private TezCounters dagCounters = new TezCounters(); private Object fullCountersLock = new Object(); @VisibleForTesting @@ -218,6 +232,7 @@ public class DAGImpl implements org.apache.tez.dag.app.dag.DAG, private TaskSpecificLaunchCmdOption taskSpecificLaunchCmdOption; private static final DagStateChangedCallback STATE_CHANGED_CALLBACK = new DagStateChangedCallback(); + private String[] logDirs; @VisibleForTesting Map> commitFutures @@ -234,6 +249,11 @@ public class DAGImpl implements org.apache.tez.dag.app.dag.DAG, private static final CommitCompletedTransition COMMIT_COMPLETED_TRANSITION = new CommitCompletedTransition(); + private final MemoryMXBean memoryMXBean = ManagementFactory.getMemoryMXBean(); + private final Set containersUsedByCurrentDAG = new HashSet<>(); + @VisibleForTesting + final Set nodesUsedByCurrentDAG = new HashSet<>(); + protected static final StateMachineFactory stateMachineFactory @@ -563,6 +583,8 @@ public DAGImpl(TezDAGID dagId, private void augmentStateMachine() { stateMachine + .registerStateEnteredCallback(DAGState.RUNNING, + STATE_CHANGED_CALLBACK) .registerStateEnteredCallback(DAGState.SUCCEEDED, STATE_CHANGED_CALLBACK) .registerStateEnteredCallback(DAGState.FAILED, @@ -577,10 +599,22 @@ private static class DagStateChangedCallback implements OnStateChangedCallback { @Override public void onStateChanged(DAGImpl dag, DAGState dagState) { - dag.isFinalState.set(true); + switch(dagState) { + case RUNNING: + dag.runningStatusYetToBeConsumed.set(true); + break; + case SUCCEEDED: + case FAILED: + case KILLED: + case ERROR: + dag.isFinalState.set(true); + break; + default: + break; + } dag.dagStatusLock.lock(); try { - dag.dagCompletionCondition.signal(); + dag.dagStateChangedCondition.signal(); } finally { dag.dagStatusLock.unlock(); } @@ -698,7 +732,7 @@ public TezCounters getAllCounters() { updateCpuCounters(); TezCounters counters = new TezCounters(); counters.incrAllCounters(dagCounters); - return incrTaskCounters(counters, vertices.values()); + return aggrTaskCounters(counters, vertices.values()); } finally { readLock.unlock(); @@ -713,7 +747,7 @@ public TezCounters getCachedCounters() { try { // FIXME a better lightweight approach for counters is needed if (fullCounters == null && cachedCounters != null - && ((cachedCountersTimestamp+10000) > System.currentTimeMillis())) { + && ((cachedCountersTimestamp + 10000) - System.currentTimeMillis() > 0)) { LOG.info("Asked for counters" + ", cachedCountersTimestamp=" + cachedCountersTimestamp + ", currentTime=" + System.currentTimeMillis()); @@ -730,7 +764,7 @@ public TezCounters getCachedCounters() { updateCpuCounters(); TezCounters counters = new TezCounters(); counters.incrAllCounters(dagCounters); - return incrTaskCounters(counters, vertices.values()); + return aggrTaskCounters(counters, vertices.values()); } finally { readLock.unlock(); @@ -746,10 +780,10 @@ boolean inTerminalState() { return false; } - public static TezCounters incrTaskCounters( + public static TezCounters aggrTaskCounters( TezCounters counters, Collection vertices) { for (Vertex vertex : vertices) { - counters.incrAllCounters(vertex.getAllCounters()); + counters.aggrAllCounters(vertex.getAllCounters()); } return counters; } @@ -800,11 +834,30 @@ public DAGReport getReport() { public float getProgress() { this.readLock.lock(); try { - float progress = 0.0f; + float accProg = 0.0f; + float dagProgress = 0.0f; + int verticesCount = getVertices().size(); for (Vertex v : getVertices().values()) { - progress += v.getProgress(); + float vertexProgress = v.getProgress(); + if (LOG.isDebugEnabled()) { + if (!ProgressHelper.isProgressWithinRange(vertexProgress)) { + LOG.debug("progress update: Vertex progress is invalid range" + + "; v={}, progress={}", v.getName(), vertexProgress); + } + } + accProg += ProgressHelper.processProgress(vertexProgress); + } + if (LOG.isDebugEnabled()) { + if (verticesCount == 0) { + LOG.debug("progress update: DAGImpl getProgress() returns 0.0f: " + + "vertices count is 0"); + } + } + if (verticesCount > 0) { + dagProgress = + ProgressHelper.processProgress(accProg / verticesCount); } - return progress / getTotalVertices(); + return dagProgress; } finally { this.readLock.unlock(); } @@ -867,6 +920,7 @@ public DAGStatusBuilder getDAGStatus(Set statusOptions) { int totalKilledTaskCount = 0; int totalFailedTaskAttemptCount = 0; int totalKilledTaskAttemptCount = 0; + int totalRejectedTaskAttemptCount = 0; readLock.lock(); try { for(Map.Entry entry : vertexMap.entrySet()) { @@ -879,6 +933,7 @@ public DAGStatusBuilder getDAGStatus(Set statusOptions) { totalKilledTaskCount += progress.getKilledTaskCount(); totalFailedTaskAttemptCount += progress.getFailedTaskAttemptCount(); totalKilledTaskAttemptCount += progress.getKilledTaskAttemptCount(); + totalRejectedTaskAttemptCount += progress.getRejectedTaskAttemptCount(); } ProgressBuilder dagProgress = new ProgressBuilder(); dagProgress.setTotalTaskCount(totalTaskCount); @@ -888,12 +943,17 @@ public DAGStatusBuilder getDAGStatus(Set statusOptions) { dagProgress.setKilledTaskCount(totalKilledTaskCount); dagProgress.setFailedTaskAttemptCount(totalFailedTaskAttemptCount); dagProgress.setKilledTaskAttemptCount(totalKilledTaskAttemptCount); + dagProgress.setRejectedTaskAttemptCount(totalRejectedTaskAttemptCount); status.setState(getState()); status.setDiagnostics(diagnostics); status.setDAGProgress(dagProgress); if (statusOptions.contains(StatusGetOpts.GET_COUNTERS)) { status.setDAGCounters(getAllCounters()); } + if (statusOptions.contains(StatusGetOpts.GET_MEMORY_USAGE)) { + status.setMemoryUsage(memoryMXBean.getHeapMemoryUsage().getUsed(), + taskCommunicatorManagerInterface.getTotalUsedMemory()); + } return status; } finally { readLock.unlock(); @@ -918,7 +978,11 @@ public DAGStatusBuilder getDAGStatus(Set statusOptions, if (isFinalState.get()) { break; } - nanosLeft = dagCompletionCondition.awaitNanos(timeoutNanos); + if (runningStatusYetToBeConsumed.compareAndSet(true, false)) { + // No need to wait further, as state just got changed to RUNNING + break; + } + nanosLeft = dagStateChangedCondition.awaitNanos(timeoutNanos); } catch (InterruptedException e) { throw new TezException("Interrupted while waiting for dag to complete", e); } finally { @@ -942,6 +1006,7 @@ private ProgressBuilder getDAGProgress() { int totalKilledTaskCount = 0; int totalFailedTaskAttemptCount = 0; int totalKilledTaskAttemptCount = 0; + int totalRejectedTaskAttemptCount = 0; readLock.lock(); try { for(Map.Entry entry : vertexMap.entrySet()) { @@ -953,6 +1018,7 @@ private ProgressBuilder getDAGProgress() { totalKilledTaskCount += progress.getKilledTaskCount(); totalFailedTaskAttemptCount += progress.getFailedTaskAttemptCount(); totalKilledTaskAttemptCount += progress.getKilledTaskAttemptCount(); + totalRejectedTaskAttemptCount += progress.getRejectedTaskAttemptCount(); } ProgressBuilder dagProgress = new ProgressBuilder(); dagProgress.setTotalTaskCount(totalTaskCount); @@ -962,6 +1028,7 @@ private ProgressBuilder getDAGProgress() { dagProgress.setKilledTaskCount(totalKilledTaskCount); dagProgress.setFailedTaskAttemptCount(totalFailedTaskAttemptCount); dagProgress.setKilledTaskAttemptCount(totalKilledTaskAttemptCount); + dagProgress.setRejectedTaskAttemptCount(totalRejectedTaskAttemptCount); return dagProgress; } finally { readLock.unlock(); @@ -979,7 +1046,7 @@ public VertexStatusBuilder getVertexStatus(String vertexName, } public TaskAttemptImpl getTaskAttempt(TezTaskAttemptID taId) { - return (TaskAttemptImpl) getVertex(taId.getTaskID().getVertexID()).getTask(taId.getTaskID()) + return (TaskAttemptImpl) getVertex(taId.getVertexID()).getTask(taId.getTaskID()) .getAttempt(taId); } @@ -1112,7 +1179,7 @@ public Void call() throws Exception { } for (Map.Entry entry : commitEvents.entrySet()) { ListenableFuture commitFuture = appContext.getExecService().submit(entry.getValue()); - Futures.addCallback(commitFuture, entry.getValue().getCallback()); + Futures.addCallback(commitFuture, entry.getValue().getCallback(), GuavaShim.directExecutor()); commitFutures.put(entry.getKey(), commitFuture); } } @@ -1145,7 +1212,7 @@ private void abortOutputs() { */ public void handle(DAGEvent event) { if (LOG.isDebugEnabled()) { - LOG.debug("Processing DAGEvent " + event.getDAGId() + " of type " + LOG.debug("Processing DAGEvent " + event.getDAGID() + " of type " + event.getType() + " while in state " + getInternalState() + ". Event: " + event); } @@ -1379,42 +1446,55 @@ private void updateCpuCounters() { dagCounters.findCounter(DAGCounter.AM_GC_TIME_MILLIS).setValue(totalDAGGCTime); } - private DAGState finished(DAGState finalState) { - if (finishTime == 0) { - setFinishTime(); - } - entityUpdateTracker.stop(); + @Override + public void incrementDagCounter(DAGCounter counter, int incrValue) { + dagCounters.findCounter(counter).increment(incrValue); + } - boolean recoveryError = false; + @Override + public void setDagCounter(DAGCounter counter, int setValue) { + dagCounters.findCounter(counter).setValue(setValue); + } - // update cpu time counters before finishing the dag - updateCpuCounters(); - TezCounters counters = null; + private DAGState finished(DAGState finalState) { + boolean dagError = false; try { - counters = getAllCounters(); - } catch (LimitExceededException e) { - addDiagnostic("Counters limit exceeded: " + e.getMessage()); - finalState = DAGState.FAILED; - } + if (finishTime == 0) { + setFinishTime(); + } + entityUpdateTracker.stop(); - try { - if (finalState == DAGState.SUCCEEDED) { - logJobHistoryFinishedEvent(counters); - } else { - logJobHistoryUnsuccesfulEvent(finalState, counters); + // update cpu time counters before finishing the dag + updateCpuCounters(); + TezCounters counters = null; + try { + counters = constructFinalFullcounters(); + } catch (LimitExceededException e) { + addDiagnostic("Counters limit exceeded: " + e.getMessage()); + finalState = DAGState.FAILED; } - } catch (IOException e) { - LOG.warn("Failed to persist recovery event for DAG completion" - + ", dagId=" + dagId - + ", finalState=" + finalState); - recoveryError = true; - } - if (finalState != DAGState.SUCCEEDED) { - abortOutputs(); - } + try { + if (finalState == DAGState.SUCCEEDED) { + logJobHistoryFinishedEvent(counters); + } else { + logJobHistoryUnsuccesfulEvent(finalState, counters); + } + } catch (IOException e) { + LOG.warn("Failed to persist recovery event for DAG completion" + + ", dagId=" + dagId + + ", finalState=" + finalState, e); + dagError = true; + } - if (recoveryError) { + if (finalState != DAGState.SUCCEEDED) { + abortOutputs(); + } + } catch (Exception e) { + dagError = true; + LOG.warn("Encountered exception while DAG finish", e); + } + if (dagError) { eventHandler.handle(new DAGAppMasterEventDAGFinished(getID(), DAGState.ERROR)); } else { eventHandler.handle(new DAGAppMasterEventDAGFinished(getID(), finalState)); @@ -1451,6 +1531,16 @@ public int getTotalVertices() { } + @Override + public BitSet getVertexDescendants(int vertexIndex) { + readLock.lock(); + try { + return vertexDescendants.get(vertexIndex); + } finally { + readLock.unlock(); + } + } + @Override public int getSuccessfulVertices() { readLock.lock(); @@ -1554,6 +1644,8 @@ DAGState initializeDAG() { parseVertexEdges(this, edgePlans, v); } + computeVertexDescendants(); + // Initialize the edges, now that the payload and vertices have been set. for (Edge e : edges.values()) { try { @@ -1586,16 +1678,19 @@ DAGState initializeDAG() { if (!groupInfo.outputs.isEmpty()) { // shared outputs for (String vertexName : groupInfo.groupMembers) { - if (LOG.isDebugEnabled()) { - LOG.debug("Setting shared outputs for group: " + groupName + - " on vertex: " + vertexName); - } + LOG.debug("Setting shared outputs for group: {} on vertex: {}", groupName, vertexName); Vertex v = getVertex(vertexName); v.addSharedOutputs(groupInfo.outputs); } } } + // This is going to override the previously generated file + // which didn't have the priorities + if (getConf().getBoolean(TezConfiguration.TEZ_GENERATE_DEBUG_ARTIFACTS, + TezConfiguration.TEZ_GENERATE_DEBUG_ARTIFACTS_DEFAULT)) { + Utils.generateDAGVizFile(this, jobPlan, logDirs, dagScheduler); + } return DAGState.INITED; } @@ -1610,6 +1705,31 @@ private void createDAGEdges(DAGImpl dag) throws TezException { } } + private void computeVertexDescendants() { + vertexDescendants = new ArrayList<>(numVertices); + for (int i = 0; i < numVertices; ++i) { + vertexDescendants.add(new BitSet(numVertices)); + } + BitSet verticesVisited = new BitSet(numVertices); + for (Vertex v : vertices.values()) { + computeVertexDescendants(verticesVisited, v); + } + } + + private BitSet computeVertexDescendants(BitSet verticesVisited, Vertex v) { + int vertexIndex = v.getVertexId().getId(); + BitSet descendants = vertexDescendants.get(vertexIndex); + if (!verticesVisited.get(vertexIndex)) { + for (Vertex child : v.getOutputVertices().keySet()) { + descendants.set(child.getVertexId().getId()); + BitSet childDescendants = computeVertexDescendants(verticesVisited, child); + descendants.or(childDescendants); + } + verticesVisited.set(vertexIndex); + } + return descendants; + } + private static void assignDAGScheduler(DAGImpl dag) throws TezException { String dagSchedulerClassName = dag.dagConf.get(TezConfiguration.TEZ_AM_DAG_SCHEDULER_CLASS, TezConfiguration.TEZ_AM_DAG_SCHEDULER_CLASS_DEFAULT); @@ -1668,6 +1788,13 @@ private static void parseVertexEdges(DAGImpl dag, Map edgePlan vertex.setInputVertices(inVertices); vertex.setOutputVertices(outVertices); + boolean cleanupShuffleDataAtVertexLevel = dag.dagConf.getInt(TezConfiguration.TEZ_AM_VERTEX_CLEANUP_HEIGHT, + TezConfiguration.TEZ_AM_VERTEX_CLEANUP_HEIGHT_DEFAULT) > 0 && ShuffleUtils.isTezShuffleHandler(dag.dagConf); + if (cleanupShuffleDataAtVertexLevel) { + int deletionHeight = dag.dagConf.getInt(TezConfiguration.TEZ_AM_VERTEX_CLEANUP_HEIGHT, + TezConfiguration.TEZ_AM_VERTEX_CLEANUP_HEIGHT_DEFAULT); + ((VertexImpl) vertex).initShuffleDeletionContext(deletionHeight); + } } /** @@ -1820,17 +1947,18 @@ private void mayBeConstructFinalFullCounters() { // Already constructed. Just return. return; } - this.constructFinalFullcounters(); + this.fullCounters = this.constructFinalFullcounters(); } } @Private - public void constructFinalFullcounters() { - this.fullCounters = new TezCounters(); - this.fullCounters.incrAllCounters(dagCounters); + public TezCounters constructFinalFullcounters() { + final AggregateTezCounters aggregateTezCounters = new AggregateTezCounters(); + aggregateTezCounters.aggrAllCounters(dagCounters); for (Vertex v : this.vertices.values()) { - this.fullCounters.incrAllCounters(v.getAllCounters()); + aggregateTezCounters.aggrAllCounters(v.getAllCounters()); } + return aggregateTezCounters; } /** @@ -2082,7 +2210,8 @@ public Void call() throws Exception { }; }; ListenableFuture groupCommitFuture = appContext.getExecService().submit(groupCommitCallableEvent); - Futures.addCallback(groupCommitFuture, groupCommitCallableEvent.getCallback()); + Futures.addCallback(groupCommitFuture, groupCommitCallableEvent.getCallback(), + GuavaShim.directExecutor()); commitFutures.put(outputKey, groupCommitFuture); } } @@ -2332,6 +2461,11 @@ public boolean isComplete() { } } + @Override + public DAGScheduler getDAGScheduler() { + return dagScheduler; + } + // output of either vertex or vertex group public static class OutputKey { String outputName; @@ -2410,4 +2544,52 @@ public void onFailure(Throwable t) { eventHandler.handle(new DAGEventCommitCompleted(dagId, outputKey, false, t)); } } + + public String[] getLogDirs() { + if (logDirs == null) { + logDirs = TezCommonUtils + .getTrimmedStrings(System.getenv(ApplicationConstants.Environment.LOG_DIRS.name())); + } + return logDirs; + } + + public DAGImpl setLogDirs(String[] logDirs) { + this.logDirs = logDirs; + return this; + } + + @Override + public void onStart() { + startVertexServices(); + } + + @Override + public void onFinish() { + stopVertexServices(); + updateCounters(); + } + + private void startVertexServices() { + for (Vertex v : getVertices().values()) { + v.startServices(); + } + } + + void stopVertexServices() { + for (Vertex v : getVertices().values()) { + v.stopServices(); + } + } + + @Override + public void addUsedContainer(Container container) { + containersUsedByCurrentDAG.add(container.getId()); + nodesUsedByCurrentDAG.add(container.getNodeId().getHost()); + } + + private void updateCounters() { + setDagCounter(DAGCounter.TOTAL_CONTAINERS_USED, containersUsedByCurrentDAG.size()); + setDagCounter(DAGCounter.NODE_USED_COUNT, nodesUsedByCurrentDAG.size()); + setDagCounter(DAGCounter.NODE_TOTAL_COUNT, appContext.getTaskScheduler().getNumClusterNodes(true)); + } } diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/DAGSchedulerNaturalOrder.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/DAGSchedulerNaturalOrder.java index 3a16f46f97..78860868dd 100644 --- a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/DAGSchedulerNaturalOrder.java +++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/DAGSchedulerNaturalOrder.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -46,19 +46,18 @@ public DAGSchedulerNaturalOrder(DAG dag, EventHandler dispatcher) { public void scheduleTaskEx(DAGEventSchedulerUpdate event) { TaskAttempt attempt = event.getAttempt(); Vertex vertex = dag.getVertex(attempt.getVertexID()); - int vertexDistanceFromRoot = vertex.getDistanceFromRoot(); // natural priority. Handles failures and retries. - int priorityLowLimit = ((vertexDistanceFromRoot + 1) * dag.getTotalVertices() * 3) + (vertex.getVertexId().getId() * 3); - int priorityHighLimit = priorityLowLimit - 2; + int priorityLowLimit = getPriorityLowLimit(dag, vertex); + int priorityHighLimit = getPriorityHighLimit(dag, vertex); if (LOG.isDebugEnabled()) { - LOG.debug("Scheduling " + attempt.getID() + " between priorityLow: " + priorityLowLimit + LOG.debug("Scheduling " + attempt.getTaskAttemptID() + " between priorityLow: " + priorityLowLimit + " and priorityHigh: " + priorityHighLimit); } TaskAttemptEventSchedule attemptEvent = new TaskAttemptEventSchedule( - attempt.getID(), priorityLowLimit, priorityHighLimit); + attempt.getTaskAttemptID(), priorityLowLimit, priorityHighLimit); sendEvent(attemptEvent); } diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/DAGSchedulerNaturalOrderControlled.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/DAGSchedulerNaturalOrderControlled.java index 34cc92ff9b..3167c9f3b7 100644 --- a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/DAGSchedulerNaturalOrderControlled.java +++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/DAGSchedulerNaturalOrderControlled.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -62,9 +62,9 @@ public class DAGSchedulerNaturalOrderControlled extends DAGScheduler { LinkedListMultimap.create(); // Tacks vertices for which no additional scheduling checks are required. Once in this list, the // vertex is considered to be fully scheduled. - private final Set scheduledVertices = new HashSet(); + private final Set scheduledVertices = new HashSet<>(); // Tracks tasks scheduled for a vertex. - private final Map vertexScheduledTasks = new HashMap(); + private final Map vertexScheduledTasks = new HashMap<>(); public DAGSchedulerNaturalOrderControlled(DAG dag, EventHandler dispatcher) { this.dag = dag; @@ -76,21 +76,20 @@ public DAGSchedulerNaturalOrderControlled(DAG dag, EventHandler dispatcher) { public void scheduleTaskEx(DAGEventSchedulerUpdate event) { TaskAttempt attempt = event.getAttempt(); Vertex vertex = dag.getVertex(attempt.getVertexID()); - int vertexDistanceFromRoot = vertex.getDistanceFromRoot(); // natural priority. Handles failures and retries. - int priorityLowLimit = ((vertexDistanceFromRoot + 1) * dag.getTotalVertices() * 3) + (vertex.getVertexId().getId() * 3); - int priorityHighLimit = priorityLowLimit - 2; + int priorityLowLimit = getPriorityLowLimit(dag, vertex); + int priorityHighLimit = getPriorityHighLimit(dag, vertex); TaskAttemptEventSchedule attemptEvent = new TaskAttemptEventSchedule( - attempt.getID(), priorityLowLimit, priorityHighLimit); + attempt.getTaskAttemptID(), priorityLowLimit, priorityHighLimit); - taskAttemptSeen(vertex.getName(), attempt.getID()); + taskAttemptSeen(vertex.getName(), attempt.getTaskAttemptID()); if (vertexAlreadyScheduled(vertex)) { // Vertex previously marked ready for scheduling. if (LOG.isDebugEnabled()) { - LOG.debug("Scheduling " + attempt.getID() + " between priorityLow: " + priorityLowLimit + LOG.debug("Scheduling " + attempt.getTaskAttemptID() + " between priorityLow: " + priorityLowLimit + " and priorityHigh: " + priorityHighLimit); } sendEvent(attemptEvent); @@ -155,8 +154,7 @@ private void processDownstreamVertices(Vertex vertex) { List newlyScheduledVertices = Lists.newLinkedList(); Map outputVertexEdgeMap = vertex.getOutputVertices(); for (Vertex destVertex : outputVertexEdgeMap.keySet()) { - if (vertexAlreadyScheduled(destVertex)) { // Nothing to do if already scheduled. - } else { + if (!vertexAlreadyScheduled(destVertex)) { if (LOG.isDebugEnabled()) { LOG.debug("Attempting to schedule vertex: " + destVertex.getLogIdentifier() + " due to upstream event from " + vertex.getLogIdentifier()); diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/Edge.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/Edge.java index f78c9a5e23..08e1c19cee 100644 --- a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/Edge.java +++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/Edge.java @@ -65,7 +65,7 @@ import org.apache.tez.runtime.api.impl.EventMetaData.EventProducerConsumerType; import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; +import org.apache.tez.common.Preconditions; import com.google.common.collect.Maps; public class Edge { @@ -374,7 +374,7 @@ public void sendTezEventToSourceTasks(TezEvent tezEvent) throws AMUserCodeExcept if (!bufferEvents.get()) { switch (tezEvent.getEventType()) { case INPUT_READ_ERROR_EVENT: - InputReadErrorEvent event = (InputReadErrorEvent) tezEvent.getEvent(); + InputReadErrorEvent inputReadErrorEvent = (InputReadErrorEvent) tezEvent.getEvent(); TezTaskAttemptID destAttemptId = tezEvent.getSourceInfo() .getTaskAttemptID(); int destTaskIndex = destAttemptId.getTaskID().getId(); @@ -383,10 +383,10 @@ public void sendTezEventToSourceTasks(TezEvent tezEvent) throws AMUserCodeExcept try { if (onDemandRouting) { srcTaskIndex = ((EdgeManagerPluginOnDemand) edgeManager).routeInputErrorEventToSource( - destTaskIndex, event.getIndex()); + destTaskIndex, inputReadErrorEvent.getIndex()); } else { - srcTaskIndex = edgeManager.routeInputErrorEventToSource(event, - destTaskIndex, event.getIndex()); + srcTaskIndex = edgeManager.routeInputErrorEventToSource(inputReadErrorEvent, + destTaskIndex, inputReadErrorEvent.getIndex()); } Preconditions.checkArgument(srcTaskIndex >= 0, "SourceTaskIndex should not be negative," @@ -413,12 +413,11 @@ public void sendTezEventToSourceTasks(TezEvent tezEvent) throws AMUserCodeExcept " destIndex=" + destTaskIndex + " edgeManager=" + edgeManager.getClass().getName()); } - TezTaskID srcTaskId = srcTask.getTaskId(); - int taskAttemptIndex = event.getVersion(); + TezTaskID srcTaskId = srcTask.getTaskID(); + int srcTaskAttemptIndex = inputReadErrorEvent.getVersion(); TezTaskAttemptID srcTaskAttemptId = TezTaskAttemptID.getInstance(srcTaskId, - taskAttemptIndex); - sendEvent(new TaskAttemptEventOutputFailed(srcTaskAttemptId, - tezEvent, numConsumers)); + srcTaskAttemptIndex); + sendEvent(new TaskAttemptEventOutputFailed(srcTaskAttemptId, tezEvent, numConsumers)); break; default: throw new TezUncheckedException("Unhandled tez event type: " diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/ImmediateStartVertexManager.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/ImmediateStartVertexManager.java index 50624ddcd5..fc77e9a04d 100644 --- a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/ImmediateStartVertexManager.java +++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/ImmediateStartVertexManager.java @@ -18,7 +18,7 @@ package org.apache.tez.dag.app.dag.impl; -import com.google.common.base.Preconditions; +import org.apache.tez.common.Preconditions; import com.google.common.collect.Lists; import com.google.common.collect.Maps; diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/OneToOneEdgeManager.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/OneToOneEdgeManager.java index dd381801bb..8a6008a496 100644 --- a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/OneToOneEdgeManager.java +++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/OneToOneEdgeManager.java @@ -28,12 +28,11 @@ import org.apache.tez.runtime.api.events.DataMovementEvent; import org.apache.tez.runtime.api.events.InputReadErrorEvent; -import com.google.common.base.Preconditions; +import org.apache.tez.common.Preconditions; public class OneToOneEdgeManager extends EdgeManagerPlugin { - final List destinationInputIndices = - Collections.unmodifiableList(Collections.singletonList(0)); + final List destinationInputIndices = Collections.singletonList(0); final AtomicBoolean stateChecked = new AtomicBoolean(false); public OneToOneEdgeManager(EdgeManagerPluginContext context) { diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/OneToOneEdgeManagerOnDemand.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/OneToOneEdgeManagerOnDemand.java index 819735a8cd..464d87df59 100644 --- a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/OneToOneEdgeManagerOnDemand.java +++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/OneToOneEdgeManagerOnDemand.java @@ -30,12 +30,11 @@ import org.apache.tez.runtime.api.events.DataMovementEvent; import org.apache.tez.runtime.api.events.InputReadErrorEvent; -import com.google.common.base.Preconditions; +import org.apache.tez.common.Preconditions; public class OneToOneEdgeManagerOnDemand extends EdgeManagerPluginOnDemand { - final List destinationInputIndices = - Collections.unmodifiableList(Collections.singletonList(0)); + final List destinationInputIndices = Collections.singletonList(0); final AtomicBoolean stateChecked = new AtomicBoolean(false); final EventRouteMetadata commonRouteMeta = diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/OutputCommitterContextImpl.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/OutputCommitterContextImpl.java index 6eae32d133..06be989b9e 100644 --- a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/OutputCommitterContextImpl.java +++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/OutputCommitterContextImpl.java @@ -18,7 +18,7 @@ package org.apache.tez.dag.app.dag.impl; -import static com.google.common.base.Preconditions.checkNotNull; +import java.util.Objects; import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.tez.dag.api.OutputCommitterDescriptor; @@ -34,24 +34,27 @@ public class OutputCommitterContextImpl implements OutputCommitterContext { private final String dagName; private final String vertexName; private final int vertexIdx; + private final int dagIdentifier; private final RootInputLeafOutput output; public OutputCommitterContextImpl(ApplicationId applicationId, - int dagAttemptNumber, - String dagName, - String vertexName, - RootInputLeafOutput output, - int vertexIdx) { - checkNotNull(applicationId, "applicationId is null"); - checkNotNull(dagName, "dagName is null"); - checkNotNull(vertexName, "vertexName is null"); - checkNotNull(output, "output is null"); + int dagAttemptNumber, + String dagName, + String vertexName, + int dagIdentifier, + int vertexIdx, + RootInputLeafOutput output) { + Objects.requireNonNull(applicationId, "applicationId is null"); + Objects.requireNonNull(dagName, "dagName is null"); + Objects.requireNonNull(vertexName, "vertexName is null"); + Objects.requireNonNull(output, "output is null"); this.applicationId = applicationId; this.dagAttemptNumber = dagAttemptNumber; this.dagName = dagName; this.vertexName = vertexName; this.output = output; this.vertexIdx = vertexIdx; + this.dagIdentifier = dagIdentifier; } @Override @@ -94,4 +97,9 @@ public int getVertexIndex() { return vertexIdx; } + @Override + public int getDagIdentifier() { + return dagIdentifier; + } + } diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/RootInputVertexManager.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/RootInputVertexManager.java index 38eba0e34d..afe2606c1b 100644 --- a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/RootInputVertexManager.java +++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/RootInputVertexManager.java @@ -49,7 +49,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.google.common.base.Preconditions; +import org.apache.tez.common.Preconditions; import com.google.common.collect.Lists; import javax.annotation.Nullable; diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/ScatterGatherEdgeManager.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/ScatterGatherEdgeManager.java index 4d373cacfe..3c6f46cfd5 100644 --- a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/ScatterGatherEdgeManager.java +++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/ScatterGatherEdgeManager.java @@ -31,12 +31,12 @@ import org.apache.tez.runtime.api.events.DataMovementEvent; import org.apache.tez.runtime.api.events.InputReadErrorEvent; -import com.google.common.base.Preconditions; +import org.apache.tez.common.Preconditions; import com.google.common.collect.Lists; public class ScatterGatherEdgeManager extends EdgeManagerPluginOnDemand { - - private AtomicReference> commonRouteMeta = + + private AtomicReference> commonRouteMeta = new AtomicReference>(); private Object commonRouteMetaLock = new Object(); private int[][] sourceIndices; @@ -55,12 +55,12 @@ public void initialize() { public int getNumDestinationTaskPhysicalInputs(int destinationTaskIndex) { return getContext().getSourceVertexNumTasks(); } - + @Override public int getNumSourceTaskPhysicalOutputs(int sourceTaskIndex) { int physicalOutputs = getContext().getDestinationVertexNumTasks(); Preconditions.checkArgument(physicalOutputs >= 0, - "ScatteGather edge manager must have destination vertex task parallelism specified"); + "ScatterGather edge manager must have destination vertex task parallelism specified"); return physicalOutputs; } @@ -98,10 +98,10 @@ private void createIndices() { targetIndices[i] = new int[]{i}; } } - + @Override public void prepareForRouting() throws Exception { - createIndices(); + createIndices(); } @Override @@ -112,12 +112,12 @@ public EventRouteMetadata routeDataMovementEventToDestination( } return null; } - + @Override public @Nullable CompositeEventRouteMetadata routeCompositeDataMovementEventToDestination( int sourceTaskIndex, int destinationTaskIndex) throws Exception { - return CompositeEventRouteMetadata.create(1, targetIndices[sourceTaskIndex][0], + return CompositeEventRouteMetadata.create(1, targetIndices[sourceTaskIndex][0], sourceIndices[destinationTaskIndex][0]); } diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/TaskAttemptImpl.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/TaskAttemptImpl.java index 3c8a9b55ff..13769db839 100644 --- a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/TaskAttemptImpl.java +++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/TaskAttemptImpl.java @@ -26,6 +26,7 @@ import java.util.List; import java.util.Map; import java.util.Set; +import java.util.concurrent.TimeUnit; import java.util.concurrent.locks.Lock; import java.util.concurrent.locks.ReentrantReadWriteLock; @@ -40,11 +41,11 @@ import org.apache.tez.dag.app.rm.AMSchedulerEventTAStateUpdated; import org.apache.tez.runtime.api.TaskFailureType; import org.apache.tez.serviceplugins.api.TaskScheduler; +import org.apache.tez.util.StringInterner; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.net.NetUtils; -import org.apache.hadoop.util.StringInterner; import org.apache.hadoop.yarn.api.records.Container; import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.NodeId; @@ -76,6 +77,7 @@ import org.apache.tez.dag.app.dag.TaskAttempt; import org.apache.tez.dag.app.dag.TaskAttemptStateInternal; import org.apache.tez.dag.app.dag.Vertex; +import org.apache.tez.dag.app.dag.Task; import org.apache.tez.dag.app.dag.event.DAGEvent; import org.apache.tez.dag.app.dag.event.DAGEventCounterUpdate; import org.apache.tez.dag.app.dag.event.DAGEventDiagnosticsUpdate; @@ -102,10 +104,7 @@ import org.apache.tez.dag.history.events.TaskAttemptFinishedEvent; import org.apache.tez.dag.history.events.TaskAttemptStartedEvent; import org.apache.tez.dag.records.TaskAttemptTerminationCause; -import org.apache.tez.dag.records.TezDAGID; import org.apache.tez.dag.records.TezTaskAttemptID; -import org.apache.tez.dag.records.TezTaskID; -import org.apache.tez.dag.records.TezVertexID; import org.apache.tez.dag.recovery.records.RecoveryProtos.DataEventDependencyInfoProto; import org.apache.tez.runtime.api.events.InputFailedEvent; import org.apache.tez.runtime.api.events.InputReadErrorEvent; @@ -117,10 +116,12 @@ import org.apache.tez.runtime.api.impl.EventMetaData.EventProducerConsumerType; import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; +import org.apache.tez.common.Preconditions; import com.google.common.collect.Lists; import com.google.common.collect.Maps; +import static org.apache.tez.dag.app.dag.impl.TezContainer.NULL_TEZ_CONTAINER; + public class TaskAttemptImpl implements TaskAttempt, EventHandler { @@ -179,18 +180,19 @@ public static DataEventDependencyInfo fromProto(DataEventDependencyInfoProto pro private TaskAttemptRecoveryData recoveryData; private long launchTime = 0; private long finishTime = 0; + /** System.nanoTime for task launch time, if recorded in this JVM. */ + private Long launchTimeNs; + /** System.nanoTime for task finish time, if recorded in this JVM. */ + private Long finishTimeNs; + /** Whether the task was recovered from a prior AM; see getDurationNs. */ + private boolean isRecoveredDuration; private String trackerName; private int httpPort; - // TODO Can these be replaced by the container object TEZ-1037 - private Container container; + TezContainer container = NULL_TEZ_CONTAINER; private long allocationTime; - private ContainerId containerId; - private NodeId containerNodeId; - private String nodeHttpAddress; - private String nodeRackName; - private final Vertex vertex; + private final Task task; private final TaskLocationHint locationHint; private final TaskSpec taskSpec; @@ -214,9 +216,6 @@ public static DataEventDependencyInfo fromProto(DataEventDependencyInfoProto pro Set taskRacks = new HashSet(); private Map uniquefailedOutputReports = Maps.newHashMap(); - private static double MAX_ALLOWED_OUTPUT_FAILURES_FRACTION; - private static int MAX_ALLOWED_OUTPUT_FAILURES; - private static int MAX_ALLOWED_TIME_FOR_TASK_READ_ERROR_SEC; protected final boolean isRescheduled; private final Resource taskResource; @@ -533,10 +532,10 @@ public TaskAttemptImpl(TezTaskAttemptID attemptId, EventHandler eventHandler, TaskHeartbeatHandler taskHeartbeatHandler, AppContext appContext, boolean isRescheduled, Resource resource, ContainerContext containerContext, boolean leafVertex, - Vertex vertex, TaskLocationHint locationHint, TaskSpec taskSpec) { + Task task, TaskLocationHint locationHint, TaskSpec taskSpec) { this(attemptId, eventHandler, taskCommunicatorManagerInterface, conf, clock, taskHeartbeatHandler, appContext, isRescheduled, resource, containerContext, leafVertex, - vertex, locationHint, taskSpec, null); + task, locationHint, taskSpec, null); } @SuppressWarnings("rawtypes") @@ -545,21 +544,9 @@ public TaskAttemptImpl(TezTaskAttemptID attemptId, EventHandler eventHandler, TaskHeartbeatHandler taskHeartbeatHandler, AppContext appContext, boolean isRescheduled, Resource resource, ContainerContext containerContext, boolean leafVertex, - Vertex vertex, TaskLocationHint locationHint, TaskSpec taskSpec, + Task task, TaskLocationHint locationHint, TaskSpec taskSpec, TezTaskAttemptID schedulingCausalTA) { - // TODO: Move these configs over to Vertex.VertexConfig - MAX_ALLOWED_OUTPUT_FAILURES = conf.getInt(TezConfiguration - .TEZ_TASK_MAX_ALLOWED_OUTPUT_FAILURES, TezConfiguration - .TEZ_TASK_MAX_ALLOWED_OUTPUT_FAILURES_DEFAULT); - - MAX_ALLOWED_OUTPUT_FAILURES_FRACTION = conf.getDouble(TezConfiguration - .TEZ_TASK_MAX_ALLOWED_OUTPUT_FAILURES_FRACTION, TezConfiguration - .TEZ_TASK_MAX_ALLOWED_OUTPUT_FAILURES_FRACTION_DEFAULT); - - MAX_ALLOWED_TIME_FOR_TASK_READ_ERROR_SEC = conf.getInt( - TezConfiguration.TEZ_AM_MAX_ALLOWED_TIME_FOR_TASK_READ_ERROR_SEC, - TezConfiguration.TEZ_AM_MAX_ALLOWED_TIME_FOR_TASK_READ_ERROR_SEC_DEFAULT); ReentrantReadWriteLock rwLock = new ReentrantReadWriteLock(); this.readLock = rwLock.readLock(); this.writeLock = rwLock.writeLock(); @@ -570,13 +557,12 @@ public TaskAttemptImpl(TezTaskAttemptID attemptId, EventHandler eventHandler, this.clock = clock; this.taskHeartbeatHandler = taskHeartbeatHandler; this.appContext = appContext; - this.vertex = vertex; + this.vertex = task.getVertex(); + this.task = task; this.locationHint = locationHint; this.taskSpec = taskSpec; this.creationCausalTA = schedulingCausalTA; this.creationTime = clock.getTime(); - //set last notified progress time to current time - this.lastNotifyProgressTimestamp = clock.getTime(); this.reportedStatus = new TaskAttemptStatus(this.attemptId); initTaskAttemptStatus(reportedStatus); @@ -595,25 +581,10 @@ public TaskAttemptImpl(TezTaskAttemptID attemptId, EventHandler eventHandler, } @Override - public TezTaskAttemptID getID() { + public TezTaskAttemptID getTaskAttemptID() { return attemptId; } - @Override - public TezTaskID getTaskID() { - return attemptId.getTaskID(); - } - - @Override - public TezVertexID getVertexID() { - return attemptId.getTaskID().getVertexID(); - } - - @Override - public TezDAGID getDAGID() { - return getVertexID().getDAGId(); - } - public TezTaskAttemptID getSchedulingCausalTA() { return creationCausalTA; } @@ -639,8 +610,8 @@ public TaskAttemptReport getReport() { result.setContainerId(this.getAssignedContainerID()); result.setNodeManagerHost(trackerName); result.setNodeManagerHttpPort(httpPort); - if (this.containerNodeId != null) { - result.setNodeManagerPort(this.containerNodeId.getPort()); + if (this.container.getNodeId() != null) { + result.setNodeManagerPort(this.container.getNodeId().getPort()); } return result; } finally { @@ -650,11 +621,9 @@ public TaskAttemptReport getReport() { @Override public List getDiagnostics() { - List result = new ArrayList(); readLock.lock(); try { - result.addAll(diagnostics); - return result; + return new ArrayList(diagnostics); } finally { readLock.unlock(); } @@ -679,6 +648,17 @@ public TezCounters getCounters() { readLock.unlock(); } } + + @VisibleForTesting + @Override + public void setCounters(TezCounters counters) { + writeLock.lock(); + try { + reportedStatus.setCounters(counters); + } finally { + writeLock.unlock(); + } + } TaskStatistics getStatistics() { return this.statistics; @@ -728,7 +708,7 @@ public boolean isFinished() { public ContainerId getAssignedContainerID() { readLock.lock(); try { - return containerId; + return container.getId(); } finally { readLock.unlock(); } @@ -738,7 +718,7 @@ public ContainerId getAssignedContainerID() { public Container getAssignedContainer() { readLock.lock(); try { - return container; + return container == NULL_TEZ_CONTAINER ? null : container; } finally { readLock.unlock(); } @@ -748,7 +728,7 @@ public Container getAssignedContainer() { public String getAssignedContainerMgrAddress() { readLock.lock(); try { - return containerNodeId.toString(); + return container.getNodeId().toString(); } finally { readLock.unlock(); } @@ -758,7 +738,7 @@ public String getAssignedContainerMgrAddress() { public NodeId getNodeId() { readLock.lock(); try { - return containerNodeId; + return container.getNodeId(); } finally { readLock.unlock(); } @@ -770,7 +750,7 @@ public NodeId getNodeId() { public String getNodeHttpAddress() { readLock.lock(); try { - return nodeHttpAddress; + return container.getNodeHttpAddress(); } finally { readLock.unlock(); } @@ -783,7 +763,7 @@ public String getNodeHttpAddress() { public String getNodeRackName() { this.readLock.lock(); try { - return this.nodeRackName; + return container.getRackName(); } finally { this.readLock.unlock(); } @@ -799,6 +779,25 @@ public long getLaunchTime() { } } + + /** @return task runtime duration in NS. */ + public long getDurationNs() { + readLock.lock(); + try { + if (isRecoveredDuration) { + // NS values are not mappable between JVMs (per documentation, at + // least), so just use the clock after recovery. + return TimeUnit.MILLISECONDS.toNanos(launchTime == 0 ? 0 + : (finishTime == 0 ? clock.getTime() : finishTime) - launchTime); + } else { + long ft = (finishTimeNs == null ? System.nanoTime() : finishTimeNs); + return (launchTimeNs == null) ? 0 : (ft - launchTimeNs); + } + } finally { + readLock.unlock(); + } + } + public long getCreationTime() { readLock.lock(); try { @@ -845,7 +844,12 @@ public long getFinishTime() { readLock.unlock(); } } - + + @Override + public Task getTask() { + return task; + } + Vertex getVertex() { return vertex; } @@ -867,12 +871,12 @@ public void handle(TaskAttemptEvent event) { LOG.error("Can't handle this event at current state for " + this.attemptId, e); eventHandler.handle(new DAGEventDiagnosticsUpdate( - this.attemptId.getTaskID().getVertexID().getDAGId(), + getDAGID(), "Invalid event " + event.getType() + " on TaskAttempt " + this.attemptId)); eventHandler.handle( new DAGEvent( - this.attemptId.getTaskID().getVertexID().getDAGId(), + getDAGID(), DAGEventType.INTERNAL_ERROR) ); } catch (RuntimeException e) { @@ -880,13 +884,13 @@ public void handle(TaskAttemptEvent event) { + " at current state " + oldState + " for " + this.attemptId, e); eventHandler.handle(new DAGEventDiagnosticsUpdate( - this.attemptId.getTaskID().getVertexID().getDAGId(), + getDAGID(), "Uncaught exception when handling event " + event.getType() + " on TaskAttempt " + this.attemptId + " at state " + oldState + ", error=" + e.getMessage())); eventHandler.handle( new DAGEvent( - this.attemptId.getTaskID().getVertexID().getDAGId(), + getDAGID(), DAGEventType.INTERNAL_ERROR) ); } @@ -947,6 +951,8 @@ private void setFinishTime() { // set the finish time only if launch time is set if (launchTime != 0 && finishTime == 0) { finishTime = clock.getTime(); + // The default clock is not safe for measuring durations. + finishTimeNs = System.nanoTime(); } } @@ -961,17 +967,24 @@ private static DAGEventCounterUpdate createDAGCounterUpdateEventTALaunched( return dagCounterEvent; } - private static DAGEventCounterUpdate createDAGCounterUpdateEventTAFinished( + @VisibleForTesting + static DAGEventCounterUpdate createDAGCounterUpdateEventTAFinished( TaskAttemptImpl taskAttempt, TaskAttemptState taState) { DAGEventCounterUpdate jce = new DAGEventCounterUpdate(taskAttempt.getDAGID()); + long amSideWallClockTimeMs = TimeUnit.NANOSECONDS.toMillis(taskAttempt.getDurationNs()); + jce.addCounterUpdate(DAGCounter.WALL_CLOCK_MILLIS, amSideWallClockTimeMs); + if (taState == TaskAttemptState.FAILED) { jce.addCounterUpdate(DAGCounter.NUM_FAILED_TASKS, 1); + jce.addCounterUpdate(DAGCounter.DURATION_FAILED_TASKS_MILLIS, amSideWallClockTimeMs); } else if (taState == TaskAttemptState.KILLED) { jce.addCounterUpdate(DAGCounter.NUM_KILLED_TASKS, 1); + jce.addCounterUpdate(DAGCounter.DURATION_KILLED_TASKS_MILLIS, amSideWallClockTimeMs); } else if (taState == TaskAttemptState.SUCCEEDED ) { jce.addCounterUpdate(DAGCounter.NUM_SUCCEEDED_TASKS, 1); + jce.addCounterUpdate(DAGCounter.DURATION_SUCCEEDED_TASKS_MILLIS, amSideWallClockTimeMs); } return jce; @@ -1049,6 +1062,14 @@ private JobHistoryEvent createTaskAttemptStartedEvent() { // */ // } + /** + * Records the launch time of the task. + */ + private void setLaunchTime() { + launchTime = clock.getTime(); + launchTimeNs = System.nanoTime(); + } + private void updateProgressSplits() { // double newProgress = reportedStatus.progress; // newProgress = Math.max(Math.min(newProgress, 1.0D), 0.0D); @@ -1112,8 +1133,8 @@ protected void logJobHistoryAttemptStarted() { String completedLogsUrl = getCompletedLogsUrl(); TaskAttemptStartedEvent startEvt = new TaskAttemptStartedEvent( attemptId, getVertex().getName(), - launchTime, containerId, containerNodeId, - inProgressLogsUrl, completedLogsUrl, nodeHttpAddress); + launchTime, container.getId(), container.getNodeId(), + inProgressLogsUrl, completedLogsUrl, container.getNodeHttpAddress()); this.appContext.getHistoryHandler().handle( new DAGHistoryEvent(getDAGID(), startEvt)); } @@ -1121,7 +1142,7 @@ attemptId, getVertex().getName(), protected void logJobHistoryAttemptFinishedEvent(TaskAttemptStateInternal state) { Preconditions.checkArgument(recoveryData == null || recoveryData.getTaskAttemptFinishedEvent() == null, - "log TaskAttemptFinishedEvent again in recovery when there's already another TaskAtttemptFinishedEvent"); + "log TaskAttemptFinishedEvent again in recovery when there's already another TaskAttemptFinishedEvent"); if (getLaunchTime() == 0) return; TaskAttemptFinishedEvent finishEvt = new TaskAttemptFinishedEvent( @@ -1139,7 +1160,7 @@ protected void logJobHistoryAttemptUnsuccesfulCompletion( TaskAttemptState state, TaskFailureType taskFailureType) { Preconditions.checkArgument(recoveryData == null || recoveryData.getTaskAttemptFinishedEvent() == null, - "log TaskAttemptFinishedEvent again in recovery when there's already another TaskAtttemptFinishedEvent"); + "log TaskAttemptFinishedEvent again in recovery when there's already another TaskAttemptFinishedEvent"); if (state == TaskAttemptState.FAILED && taskFailureType == null) { throw new IllegalStateException("FAILED state must be accompanied by a FailureType"); } @@ -1150,8 +1171,8 @@ protected void logJobHistoryAttemptUnsuccesfulCompletion( String completedLogsUrl = null; if (finishTime <= 0) { finishTime = clock.getTime(); // comes here in case it was terminated before launch - unsuccessfulContainerId = containerId; - unsuccessfulContainerNodeId = containerNodeId; + unsuccessfulContainerId = container.getId(); + unsuccessfulContainerNodeId = container.getNodeId(); inProgressLogsUrl = getInProgressLogsUrl(); completedLogsUrl = getCompletedLogsUrl(); } @@ -1162,8 +1183,8 @@ attemptId, getVertex().getName(), getLaunchTime(), terminationCause, StringUtils.join( getDiagnostics(), LINE_SEPARATOR), getCounters(), lastDataEvents, - taGeneratedEvents, creationTime, creationCausalTA, allocationTime, - unsuccessfulContainerId, unsuccessfulContainerNodeId, inProgressLogsUrl, completedLogsUrl, nodeHttpAddress); + taGeneratedEvents, creationTime, creationCausalTA, allocationTime, unsuccessfulContainerId, + unsuccessfulContainerNodeId, inProgressLogsUrl, completedLogsUrl, container.getNodeHttpAddress()); // FIXME how do we store information regd completion events this.appContext.getHistoryHandler().handle( new DAGHistoryEvent(getDAGID(), finishEvt)); @@ -1175,9 +1196,9 @@ private String getInProgressLogsUrl() { TezConstants.getTezYarnServicePluginName()) || getVertex().getServicePluginInfo().getContainerLauncherName().equals( TezConstants.getTezUberServicePluginName())) { - if (containerId != null && nodeHttpAddress != null) { - final String containerIdStr = containerId.toString(); - inProgressLogsUrl = nodeHttpAddress + if (container.getId() != null && container.getNodeHttpAddress() != null) { + final String containerIdStr = container.getId().toString(); + inProgressLogsUrl = container.getNodeHttpAddress() + "/" + "node/containerlogs" + "/" + containerIdStr + "/" + this.appContext.getUser(); @@ -1185,7 +1206,7 @@ private String getInProgressLogsUrl() { } else { inProgressLogsUrl = appContext.getTaskCommunicatorManager().getInProgressLogsUrl( getVertex().getTaskCommunicatorIdentifier(), - attemptId, containerNodeId); + attemptId, container.getNodeId()); } return inProgressLogsUrl; } @@ -1196,15 +1217,16 @@ private String getCompletedLogsUrl() { TezConstants.getTezYarnServicePluginName()) || getVertex().getServicePluginInfo().getContainerLauncherName().equals( TezConstants.getTezUberServicePluginName())) { - if (containerId != null && containerNodeId != null && nodeHttpAddress != null) { - final String containerIdStr = containerId.toString(); + if (container.getId() != null && container.getNodeId() != null && + container.getNodeHttpAddress() != null) { + final String containerIdStr = container.getId().toString(); if (conf.getBoolean(YarnConfiguration.LOG_AGGREGATION_ENABLED, YarnConfiguration.DEFAULT_LOG_AGGREGATION_ENABLED) && conf.get(YarnConfiguration.YARN_LOG_SERVER_URL) != null) { String contextStr = "v_" + getVertex().getName() + "_" + this.attemptId.toString(); completedLogsUrl = conf.get(YarnConfiguration.YARN_LOG_SERVER_URL) - + "/" + containerNodeId.toString() + + "/" + container.getNodeId().toString() + "/" + containerIdStr + "/" + contextStr + "/" + this.appContext.getUser(); @@ -1213,7 +1235,7 @@ private String getCompletedLogsUrl() { } else { completedLogsUrl = appContext.getTaskCommunicatorManager().getCompletedLogsUrl( getVertex().getTaskCommunicatorIdentifier(), - attemptId, containerNodeId); + attemptId, container.getNodeId()); } return completedLogsUrl; } @@ -1232,14 +1254,13 @@ public TaskAttemptStateInternal transition(TaskAttemptImpl ta, TaskAttemptEvent ta.recoveryData.getTaskAttemptStartedEvent(); if (taStartedEvent != null) { ta.launchTime = taStartedEvent.getStartTime(); + ta.isRecoveredDuration = true; TaskAttemptFinishedEvent taFinishedEvent = ta.recoveryData.getTaskAttemptFinishedEvent(); if (taFinishedEvent == null) { - if (LOG.isDebugEnabled()) { - LOG.debug("Only TaskAttemptStartedEvent but no TaskAttemptFinishedEvent, " - + "send out TaskAttemptEventAttemptKilled to move it to KILLED"); - } - ta.sendEvent(new TaskAttemptEventAttemptKilled(ta.getID(), + LOG.debug("Only TaskAttemptStartedEvent but no TaskAttemptFinishedEvent, " + + "send out TaskAttemptEventAttemptKilled to move it to KILLED"); + ta.sendEvent(new TaskAttemptEventAttemptKilled(ta.getTaskAttemptID(), "Task Attempt killed in recovery due to can't recover the running task attempt", TaskAttemptTerminationCause.TERMINATED_AT_RECOVERY, true)); return TaskAttemptStateInternal.NEW; @@ -1250,38 +1271,29 @@ public TaskAttemptStateInternal transition(TaskAttemptImpl ta, TaskAttemptEvent TaskAttemptFinishedEvent taFinishedEvent = ta.recoveryData.getTaskAttemptFinishedEvent(); Preconditions.checkArgument(taFinishedEvent != null, "Both of TaskAttemptStartedEvent and TaskFinishedEvent is null," - + "taskAttemptId=" + ta.getID()); + + "taskAttemptId=" + ta.getTaskAttemptID()); switch (taFinishedEvent.getState()) { case FAILED: - if (LOG.isDebugEnabled()) { - LOG.debug("TaskAttemptFinishedEvent is seen with state of FAILED" - + ", send TA_FAILED to itself" - + ", attemptId=" + ta.attemptId); - } - ta.sendEvent(new TaskAttemptEventAttemptFailed(ta.getID(), TaskAttemptEventType.TA_FAILED, + LOG.debug("TaskAttemptFinishedEvent is seen with state of FAILED, " + + "send TA_FAILED to itself, attemptId={}", ta.attemptId); + ta.sendEvent(new TaskAttemptEventAttemptFailed(ta.getTaskAttemptID(), TaskAttemptEventType.TA_FAILED, taFinishedEvent.getTaskFailureType(), taFinishedEvent.getDiagnostics(), taFinishedEvent.getTaskAttemptError(), true)); break; case KILLED: - if (LOG.isDebugEnabled()) { - LOG.debug("TaskAttemptFinishedEvent is seen with state of KILLED" - + ", send TA_KILLED to itself" - + ", attemptId=" + ta.attemptId); - } - ta.sendEvent(new TaskAttemptEventAttemptKilled(ta.getID(), + LOG.debug("TaskAttemptFinishedEvent is seen with state of KILLED, " + + "send TA_KILLED to itself, attemptId={}", ta.attemptId); + ta.sendEvent(new TaskAttemptEventAttemptKilled(ta.getTaskAttemptID(), taFinishedEvent.getDiagnostics(), taFinishedEvent.getTaskAttemptError(), true)); break; case SUCCEEDED: - if (LOG.isDebugEnabled()) { - LOG.debug("TaskAttemptFinishedEvent is seen with state of SUCCEEDED" - + ", send TA_DONE to itself" - + ", attemptId=" + ta.attemptId); - } - ta.sendEvent(new TaskAttemptEvent(ta.getID(), TaskAttemptEventType.TA_DONE)); + LOG.debug("TaskAttemptFinishedEvent is seen with state of SUCCEEDED, " + + "send TA_DONE to itself, attemptId={}", ta.attemptId); + ta.sendEvent(new TaskAttemptEvent(ta.getTaskAttemptID(), TaskAttemptEventType.TA_DONE)); break; default: throw new TezUncheckedException("Invalid state in TaskAttemptFinishedEvent, state=" - + taFinishedEvent.getState() + ", taId=" + ta.getID()); + + taFinishedEvent.getState() + ", taId=" + ta.getTaskAttemptID()); } return TaskAttemptStateInternal.NEW; } @@ -1314,7 +1326,7 @@ public TaskAttemptStateInternal transition(TaskAttemptImpl ta, TaskAttemptEvent ta.taskRacks = racks; // Ask for hosts / racks only if not a re-scheduled task. - if (ta.isRescheduled && ta.getVertex().getVertexConfig().getTaskRescheduleHigherPriority()) { + if (ta.isRescheduled && ta.getVertex().getVertexConfig().getTaskRescheduleRelaxedLocality()) { locationHint = null; } @@ -1372,13 +1384,10 @@ public void transition(TaskAttemptImpl ta, TaskAttemptEvent event) { if (event instanceof TaskAttemptEventContainerTerminated) { TaskAttemptEventContainerTerminated tEvent = (TaskAttemptEventContainerTerminated) event; AMContainer amContainer = ta.appContext.getAllContainers().get(tEvent.getContainerId()); - Container container = amContainer.getContainer(); + TezContainer container = new TezContainer(amContainer.getContainer()); ta.allocationTime = amContainer.getCurrentTaskAttemptAllocationTime(); ta.container = container; - ta.containerId = tEvent.getContainerId(); - ta.containerNodeId = container.getNodeId(); - ta.nodeHttpAddress = StringInterner.weakIntern(container.getNodeHttpAddress()); } if (event instanceof TaskAttemptEventContainerTerminatedBySystem) { @@ -1387,10 +1396,7 @@ public void transition(TaskAttemptImpl ta, TaskAttemptEvent event) { Container container = amContainer.getContainer(); ta.allocationTime = amContainer.getCurrentTaskAttemptAllocationTime(); - ta.container = container; - ta.containerId = tEvent.getContainerId(); - ta.containerNodeId = container.getNodeId(); - ta.nodeHttpAddress = StringInterner.weakIntern(container.getNodeHttpAddress()); + ta.container = new TezContainer(container); } if (ta.recoveryData == null || @@ -1400,20 +1406,21 @@ public void transition(TaskAttemptImpl ta, TaskAttemptEvent event) { .getTaskAttemptState(), helper.getFailureType(event)); } else { ta.finishTime = ta.recoveryData.getTaskAttemptFinishedEvent().getFinishTime(); + ta.isRecoveredDuration = true; } if (event instanceof RecoveryEvent) { RecoveryEvent rEvent = (RecoveryEvent)event; if (rEvent.isFromRecovery()) { if (LOG.isDebugEnabled()) { - LOG.debug("Faked TerminateEvent from recovery, taskAttemptId=" + ta.getID()); + LOG.debug("Faked TerminateEvent from recovery, taskAttemptId=" + ta.getTaskAttemptID()); } } } ta.sendEvent(createDAGCounterUpdateEventTAFinished(ta, helper.getTaskAttemptState())); // Send out events to the Task - indicating TaskAttemptTermination(F/K) - ta.sendEvent(helper.getTaskEvent(ta.attemptId, event)); + ta.sendEvent(helper.getTaskEvent(ta.attemptId, event)); } } @@ -1425,28 +1432,23 @@ public void transition(TaskAttemptImpl ta, TaskAttemptEvent origEvent) { TaskAttemptEventSubmitted event = (TaskAttemptEventSubmitted) origEvent; AMContainer amContainer = ta.appContext.getAllContainers().get(event.getContainerId()); - Container container = amContainer.getContainer(); + TezContainer container = new TezContainer(amContainer.getContainer()); ta.allocationTime = amContainer.getCurrentTaskAttemptAllocationTime(); - ta.container = container; - ta.containerId = event.getContainerId(); - ta.containerNodeId = container.getNodeId(); - ta.nodeHttpAddress = StringInterner.weakIntern(container.getNodeHttpAddress()); - ta.nodeRackName = StringInterner.weakIntern(RackResolver.resolve(ta.containerNodeId.getHost()) - .getNetworkLocation()); + ta.container = new TezContainer(container); + ta.lastNotifyProgressTimestamp = ta.clock.getTime(); - ta.launchTime = ta.clock.getTime(); + ta.setLaunchTime(); // TODO Resolve to host / IP in case of a local address. - InetSocketAddress nodeHttpInetAddr = NetUtils - .createSocketAddr(ta.nodeHttpAddress); // TODO: Costly? - ta.trackerName = StringInterner.weakIntern(nodeHttpInetAddr.getHostName()); + InetSocketAddress nodeHttpInetAddr = NetUtils.createSocketAddr(container.getNodeHttpAddress()); // TODO: Costly? + ta.trackerName = StringInterner.intern(nodeHttpInetAddr.getHostName()); ta.httpPort = nodeHttpInetAddr.getPort(); ta.sendEvent(createDAGCounterUpdateEventTALaunched(ta)); LOG.info("TaskAttempt: [" + ta.attemptId + "] submitted." - + " Is using containerId: [" + ta.containerId + "]" + " on NM: [" - + ta.containerNodeId + "]"); + + " Is using containerId: [" + ta.container.getId() + "]" + " on NM: [" + + ta.container.getNodeId() + "]"); // JobHistoryEvent. // The started event represents when the attempt was submitted to the executor. @@ -1454,9 +1456,9 @@ public void transition(TaskAttemptImpl ta, TaskAttemptEvent origEvent) { // TODO Remove after HDFS-5098 // Compute LOCALITY counter for this task. - if (ta.taskHosts.contains(ta.containerNodeId.getHost())) { + if (ta.taskHosts.contains(ta.container.getNodeId().getHost())) { ta.localityCounter = DAGCounter.DATA_LOCAL_TASKS; - } else if (ta.taskRacks.contains(ta.nodeRackName)) { + } else if (ta.taskRacks.contains(container.getRackName())) { ta.localityCounter = DAGCounter.RACK_LOCAL_TASKS; } else { // Not computing this if the task does not have locality information. @@ -1515,9 +1517,9 @@ public void transition(TaskAttemptImpl ta, TaskAttemptEvent event) { super.transition(ta, event); // Inform the scheduler if (sendSchedulerEvent()) { - ta.sendEvent(new AMSchedulerEventTAEnded(ta, ta.containerId, helper - .getTaskAttemptState(), TezUtilsInternal.toTaskAttemptEndReason(ta.terminationCause), - ta instanceof DiagnosableEvent ? ((DiagnosableEvent)ta).getDiagnosticInfo() : null, + ta.sendEvent(new AMSchedulerEventTAEnded(ta, ta.container.getId(), helper.getTaskAttemptState(), + TezUtilsInternal.toTaskAttemptEndReason(ta.terminationCause), + ta instanceof DiagnosableEvent ? ((DiagnosableEvent) ta).getDiagnosticInfo() : null, ta.getVertex().getTaskSchedulerIdentifier())); } } @@ -1575,21 +1577,25 @@ public void transition(TaskAttemptImpl ta, TaskAttemptEvent event) { TaskStatusUpdateEvent statusEvent = sEvent.getStatusEvent(); ta.reportedStatus.state = ta.getState(); ta.reportedStatus.progress = statusEvent.getProgress(); - ta.reportedStatus.counters = statusEvent.getCounters(); - ta.statistics = statusEvent.getStatistics(); + if (statusEvent.getCounters() != null) { + ta.reportedStatus.counters = statusEvent.getCounters(); + } + if (statusEvent.getStatistics() != null) { + ta.statistics = statusEvent.getStatistics(); + } if (statusEvent.getProgressNotified()) { ta.lastNotifyProgressTimestamp = ta.clock.getTime(); } else { long currTime = ta.clock.getTime(); - if (ta.hungIntervalMax > 0 && + if (ta.hungIntervalMax > 0 && ta.lastNotifyProgressTimestamp > 0 && currTime - ta.lastNotifyProgressTimestamp > ta.hungIntervalMax) { // task is hung String diagnostics = "Attempt failed because it appears to make no progress for " + ta.hungIntervalMax + "ms"; - LOG.info(diagnostics + " " + ta.getID()); + LOG.info(diagnostics + " " + ta.getTaskAttemptID()); // send event that will fail this attempt ta.sendEvent( - new TaskAttemptEventAttemptFailed(ta.getID(), + new TaskAttemptEventAttemptFailed(ta.getTaskAttemptID(), TaskAttemptEventType.TA_FAILED, TaskFailureType.NON_FATAL, diagnostics, @@ -1628,20 +1634,19 @@ protected static class SucceededTransition implements public void transition(TaskAttemptImpl ta, TaskAttemptEvent event) { // If TaskAttempt is recovered to SUCCEEDED, send events generated by this TaskAttempt to vertex - // for its downstream consumers. For normal dag execution, the events are sent by TaskAttmeptListener + // for its downstream consumers. For normal dag execution, the events are sent by TaskAttemptListener // for performance consideration. if (ta.recoveryData != null && ta.recoveryData.isTaskAttemptSucceeded()) { TaskAttemptFinishedEvent taFinishedEvent = ta.recoveryData .getTaskAttemptFinishedEvent(); - if (LOG.isDebugEnabled()) { - LOG.debug("TaskAttempt is recovered to SUCCEEDED, attemptId=" + ta.attemptId); - } + LOG.debug("TaskAttempt is recovered to SUCCEEDED, attemptId={}", ta.attemptId); ta.reportedStatus.counters = taFinishedEvent.getCounters(); List tezEvents = taFinishedEvent.getTAGeneratedEvents(); if (tezEvents != null && !tezEvents.isEmpty()) { ta.sendEvent(new VertexEventRouteEvent(ta.getVertexID(), tezEvents)); } ta.finishTime = taFinishedEvent.getFinishTime(); + ta.isRecoveredDuration = true; } else { ta.setFinishTime(); // Send out history event. @@ -1652,8 +1657,8 @@ public void transition(TaskAttemptImpl ta, TaskAttemptEvent event) { TaskAttemptState.SUCCEEDED)); // Inform the Scheduler. - ta.sendEvent(new AMSchedulerEventTAEnded(ta, ta.containerId, - TaskAttemptState.SUCCEEDED, null, null, ta.getVertex().getTaskSchedulerIdentifier())); + ta.sendEvent(new AMSchedulerEventTAEnded(ta, ta.container.getId(), TaskAttemptState.SUCCEEDED, null, null, + ta.getVertex().getTaskSchedulerIdentifier())); // Inform the task. ta.sendEvent(new TaskEventTASucceeded(ta.attemptId)); @@ -1767,71 +1772,130 @@ protected static class OutputReportedFailedTransition implements MultipleArcTransition { @Override - public TaskAttemptStateInternal transition(TaskAttemptImpl attempt, + public TaskAttemptStateInternal transition(TaskAttemptImpl sourceAttempt, TaskAttemptEvent event) { TaskAttemptEventOutputFailed outputFailedEvent = (TaskAttemptEventOutputFailed) event; - TezEvent tezEvent = outputFailedEvent.getInputFailedEvent(); - TezTaskAttemptID failedDestTaId = tezEvent.getSourceInfo().getTaskAttemptID(); - InputReadErrorEvent readErrorEvent = (InputReadErrorEvent)tezEvent.getEvent(); + TezEvent inputFailedEvent = outputFailedEvent.getInputFailedEvent(); + TezTaskAttemptID failedDestTaId = inputFailedEvent.getSourceInfo().getTaskAttemptID(); + + InputReadErrorEvent readErrorEvent = (InputReadErrorEvent)inputFailedEvent.getEvent(); int failedInputIndexOnDestTa = readErrorEvent.getIndex(); - if (readErrorEvent.getVersion() != attempt.getID().getId()) { - throw new TezUncheckedException(attempt.getID() + + if (readErrorEvent.getVersion() != sourceAttempt.getTaskAttemptID().getId()) { + throw new TezUncheckedException(sourceAttempt.getTaskAttemptID() + " incorrectly blamed for read error from " + failedDestTaId + " at inputIndex " + failedInputIndexOnDestTa + " version" + readErrorEvent.getVersion()); } - LOG.info(attempt.getID() - + " blamed for read error from " + failedDestTaId - + " at inputIndex " + failedInputIndexOnDestTa); - long time = attempt.clock.getTime(); - Long firstErrReportTime = attempt.uniquefailedOutputReports.get(failedDestTaId); + // source host: where the data input is supposed to come from + String sHost = sourceAttempt.getNodeId().getHost(); + // destination: where the data is tried to be fetched to + String dHost = readErrorEvent.getDestinationLocalhostName(); + + LOG.info("{} (on {}) blamed for read error from {} (on {}) at inputIndex {}", sourceAttempt.getTaskAttemptID(), + sHost, failedDestTaId, dHost, failedInputIndexOnDestTa); + + boolean tooManyDownstreamHostsBlamedTheSameUpstreamHost = false; + Map> downstreamBlamingHosts = sourceAttempt.getVertex().getDownstreamBlamingHosts(); + if (!downstreamBlamingHosts.containsKey(sHost)) { + LOG.info("Host {} is blamed for fetch failure from {} for the first time", sHost, dHost); + downstreamBlamingHosts.put(sHost, new HashSet()); + } + + downstreamBlamingHosts.get(sHost).add(dHost); + int currentNumberOfFailingDownstreamHosts = downstreamBlamingHosts.get(sHost).size(); + int numNodes = getNumNodes(sourceAttempt); + float hostFailureFraction = numNodes > 0 ? ((float) currentNumberOfFailingDownstreamHosts) / numNodes : 0; + double maxAllowedHostFailureFraction = sourceAttempt.getVertex().getVertexConfig() + .getMaxAllowedDownstreamHostFailuresFraction(); + + if (hostFailureFraction > maxAllowedHostFailureFraction) { + LOG.info("Host will be marked fail: {} because of host failure fraction {} is beyond the limit {}", sHost, + hostFailureFraction, maxAllowedHostFailureFraction); + tooManyDownstreamHostsBlamedTheSameUpstreamHost = true; + } + + long time = sourceAttempt.clock.getTime(); + + Long firstErrReportTime = sourceAttempt.uniquefailedOutputReports.get(failedDestTaId); if (firstErrReportTime == null) { - attempt.uniquefailedOutputReports.put(failedDestTaId, time); + sourceAttempt.uniquefailedOutputReports.put(failedDestTaId, time); firstErrReportTime = time; } - - int readErrorTimespanSec = (int)((time - firstErrReportTime)/1000); - boolean crossTimeDeadline = readErrorTimespanSec >= MAX_ALLOWED_TIME_FOR_TASK_READ_ERROR_SEC; - float failureFraction = ((float) attempt.uniquefailedOutputReports.size()) - / outputFailedEvent.getConsumerTaskNumber(); + int maxAllowedOutputFailures = sourceAttempt.getVertex().getVertexConfig() + .getMaxAllowedOutputFailures(); + int maxAllowedTimeForTaskReadErrorSec = sourceAttempt.getVertex() + .getVertexConfig().getMaxAllowedTimeForTaskReadErrorSec(); + double maxAllowedOutputFailuresFraction = sourceAttempt.getVertex() + .getVertexConfig().getMaxAllowedOutputFailuresFraction(); + + int readErrorTimespanSec = (int)((time - firstErrReportTime)/1000); + boolean crossTimeDeadline = readErrorTimespanSec >= maxAllowedTimeForTaskReadErrorSec; + int runningTasks = sourceAttempt.appContext.getCurrentDAG().getVertex( + failedDestTaId.getVertexID()).getRunningTasks(); + float failureFraction = + runningTasks > 0 ? ((float) sourceAttempt.uniquefailedOutputReports.size()) / runningTasks : 0; boolean withinFailureFractionLimits = - (failureFraction <= MAX_ALLOWED_OUTPUT_FAILURES_FRACTION); + (failureFraction <= maxAllowedOutputFailuresFraction); boolean withinOutputFailureLimits = - (attempt.uniquefailedOutputReports.size() < MAX_ALLOWED_OUTPUT_FAILURES); + (sourceAttempt.uniquefailedOutputReports.size() < maxAllowedOutputFailures); // If needed we can launch a background task without failing this task // to generate a copy of the output just in case. // If needed we can consider only running consumer tasks - if (!crossTimeDeadline && withinFailureFractionLimits && withinOutputFailureLimits) { - return attempt.getInternalState(); + if (!crossTimeDeadline && withinFailureFractionLimits && withinOutputFailureLimits + && !(readErrorEvent.isLocalFetch() || readErrorEvent.isDiskErrorAtSource()) + && !tooManyDownstreamHostsBlamedTheSameUpstreamHost) { + return sourceAttempt.getInternalState(); } - String message = attempt.getID() + " being failed for too many output errors. " + String message = sourceAttempt.getTaskAttemptID() + " being failed for too many output errors. " + "failureFraction=" + failureFraction - + ", MAX_ALLOWED_OUTPUT_FAILURES_FRACTION=" + MAX_ALLOWED_OUTPUT_FAILURES_FRACTION - + ", uniquefailedOutputReports=" + attempt.uniquefailedOutputReports.size() - + ", MAX_ALLOWED_OUTPUT_FAILURES=" + MAX_ALLOWED_OUTPUT_FAILURES - + ", MAX_ALLOWED_TIME_FOR_TASK_READ_ERROR_SEC=" + MAX_ALLOWED_TIME_FOR_TASK_READ_ERROR_SEC - + ", readErrorTimespan=" + readErrorTimespanSec; + + ", MAX_ALLOWED_OUTPUT_FAILURES_FRACTION=" + + maxAllowedOutputFailuresFraction + + ", uniquefailedOutputReports=" + sourceAttempt.uniquefailedOutputReports.size() + + ", MAX_ALLOWED_OUTPUT_FAILURES=" + maxAllowedOutputFailures + + ", hostFailureFraction=" + hostFailureFraction + + " (" + currentNumberOfFailingDownstreamHosts + " / " + numNodes + ")" + + ", MAX_ALLOWED_DOWNSTREAM_HOST_FAILURES_FRACTION=" + + maxAllowedHostFailureFraction + + ", MAX_ALLOWED_TIME_FOR_TASK_READ_ERROR_SEC=" + + maxAllowedTimeForTaskReadErrorSec + + ", readErrorTimespan=" + readErrorTimespanSec + + ", isLocalFetch=" + readErrorEvent.isLocalFetch() + + ", isDiskErrorAtSource=" + readErrorEvent.isDiskErrorAtSource(); + LOG.info(message); - attempt.addDiagnosticInfo(message); + sourceAttempt.addDiagnosticInfo(message); // send input failed event - attempt.sendInputFailedToConsumers(); + sourceAttempt.sendInputFailedToConsumers(); // Not checking for leafVertex since a READ_ERROR should only be reported for intermediate tasks. - if (attempt.getInternalState() == TaskAttemptStateInternal.SUCCEEDED) { + if (sourceAttempt.getInternalState() == TaskAttemptStateInternal.SUCCEEDED) { (new TerminatedAfterSuccessHelper(FAILED_HELPER)).transition( - attempt, event); + sourceAttempt, event); return TaskAttemptStateInternal.FAILED; } else { (new TerminatedWhileRunningTransition(FAILED_HELPER)).transition( - attempt, event); + sourceAttempt, event); return TaskAttemptStateInternal.FAIL_IN_PROGRESS; } // TODO at some point. Nodes may be interested in FetchFailure info. // Can be used to blacklist nodes. } + + private int getNumNodes(TaskAttemptImpl sourceAttempt) { + Vertex vertex = sourceAttempt.getVertex(); + String taskSchedulerName = vertex.getServicePluginInfo().getTaskSchedulerName(); + int sourceIndex = vertex.getAppContext().getTaskScheduerIdentifier(taskSchedulerName); + int numActiveNodes = vertex.getAppContext().getNodeTracker().getNumActiveNodes(sourceIndex); + if (LOG.isDebugEnabled()) { + int numAllNodes = vertex.getAppContext().getNodeTracker().getNumNodes(sourceIndex); + LOG.debug("Getting nodes, active/all: {}/{}", numActiveNodes, numAllNodes); + } + return numActiveNodes; + } } @VisibleForTesting @@ -1844,8 +1908,8 @@ protected void sendInputFailedToConsumers() { tezIfEvents.add(new TezEvent(new InputFailedEvent(), new EventMetaData(EventProducerConsumerType.SYSTEM, vertex.getName(), - edgeVertex.getName(), - getID()), appContext.getClock().getTime())); + edgeVertex.getName(), + getTaskAttemptID()), appContext.getClock().getTime())); } sendEvent(new VertexEventRouteEvent(vertex.getVertexId(), tezIfEvents)); } @@ -1939,7 +2003,7 @@ public TaskFailureType getFailureType(TaskAttemptEvent event) { @Override public String toString() { - return getID().toString(); + return getTaskAttemptID().toString(); } diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/TaskAttemptImplHelpers.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/TaskAttemptImplHelpers.java index 9e4f2b4a6e..fba82613dd 100644 --- a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/TaskAttemptImplHelpers.java +++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/TaskAttemptImplHelpers.java @@ -25,9 +25,11 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -public class TaskAttemptImplHelpers { +public final class TaskAttemptImplHelpers { private static final Logger LOG = LoggerFactory.getLogger(TaskAttemptImplHelpers.class); + + private TaskAttemptImplHelpers() {} static String[] resolveHosts(String[] src) { String[] result = new String[src.length]; diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/TaskImpl.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/TaskImpl.java index bed41415fb..73856f0407 100644 --- a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/TaskImpl.java +++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/TaskImpl.java @@ -26,11 +26,13 @@ import java.util.List; import java.util.Map; import java.util.Map.Entry; +import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.locks.Lock; import java.util.concurrent.locks.ReadWriteLock; import java.util.concurrent.locks.ReentrantReadWriteLock; -import com.google.common.base.Preconditions; +import org.apache.tez.common.Preconditions; import com.google.common.base.Predicate; import com.google.common.collect.Maps; @@ -38,11 +40,16 @@ import org.apache.commons.lang.exception.ExceptionUtils; import org.apache.hadoop.classification.InterfaceAudience.Private; import org.apache.tez.dag.app.dag.event.TaskEventTAFailed; +import org.apache.tez.dag.records.TaskAttemptTerminationCause; +import org.apache.tez.dag.records.TezTaskAttemptID; +import org.apache.tez.dag.records.TezTaskID; +import org.apache.tez.dag.records.TezVertexID; import org.apache.tez.runtime.api.TaskFailureType; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.yarn.api.records.ContainerId; +import org.apache.hadoop.yarn.api.records.NodeId; import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.event.EventHandler; import org.apache.hadoop.yarn.state.InvalidStateTransitonException; @@ -75,8 +82,10 @@ import org.apache.tez.dag.app.dag.event.TaskAttemptEventAttemptKilled; import org.apache.tez.dag.app.dag.event.TaskAttemptEventKillRequest; import org.apache.tez.dag.app.dag.event.TaskAttemptEventOutputFailed; +import org.apache.tez.dag.app.dag.event.TaskAttemptEventTerminationCauseEvent; import org.apache.tez.dag.app.dag.event.TaskEvent; import org.apache.tez.dag.app.dag.event.TaskEventScheduleTask; +import org.apache.tez.dag.app.dag.event.TaskEventTAKilled; import org.apache.tez.dag.app.dag.event.TaskEventTAUpdate; import org.apache.tez.dag.app.dag.event.TaskEventTermination; import org.apache.tez.dag.app.dag.event.TaskEventType; @@ -88,10 +97,6 @@ import org.apache.tez.dag.history.DAGHistoryEvent; import org.apache.tez.dag.history.events.TaskFinishedEvent; import org.apache.tez.dag.history.events.TaskStartedEvent; -import org.apache.tez.dag.records.TaskAttemptTerminationCause; -import org.apache.tez.dag.records.TezTaskAttemptID; -import org.apache.tez.dag.records.TezTaskID; -import org.apache.tez.dag.records.TezVertexID; import org.apache.tez.dag.utils.TezBuilderUtils; import org.apache.tez.runtime.api.OutputCommitter; import org.apache.tez.runtime.api.impl.TaskSpec; @@ -121,6 +126,7 @@ public class TaskImpl implements Task, EventHandler { private final TezTaskID taskId; private Map attempts; protected final int maxFailedAttempts; + protected final int maxAttempts; //overall max number of attempts (consider preempted task attempts) protected final Clock clock; private final Vertex vertex; private final Lock readLock; @@ -147,7 +153,14 @@ public class TaskImpl implements Task, EventHandler { // track the status of TaskAttempt (true mean completed, false mean uncompleted) private final Map taskAttemptStatus = new HashMap(); - private static final SingleArcTransition + // The set of nodes with active running attempts at the time of the latest attempt for + // this task was scheduled. This set is empty when scheduling original task attempt, and + // non-empty scheduling a speculative attempt, in which case scheduler should avoid + // scheduling the speculative attempt onto node(s) recorded in this set. + private final Set nodesWithRunningAttempts = Collections + .newSetFromMap(new ConcurrentHashMap()); + + private static final MultipleArcTransition ATTEMPT_KILLED_TRANSITION = new AttemptKilledTransition(); private static final SingleArcTransition KILL_TRANSITION = new KillTransition(); @@ -156,7 +169,7 @@ public class TaskImpl implements Task, EventHandler { private static final TaskStateChangedCallback STATE_CHANGED_CALLBACK = new TaskStateChangedCallback(); - + private static final StateMachineFactory stateMachineFactory @@ -166,7 +179,7 @@ public class TaskImpl implements Task, EventHandler { // define the state machine of Task // Transitions from NEW state - // Stay in NEW in recovery when Task is killed in the previous AM + // Stay in NEW in recovery when Task is killed in the previous AM .addTransition(TaskStateInternal.NEW, EnumSet.of(TaskStateInternal.NEW, TaskStateInternal.SCHEDULED), TaskEventType.T_SCHEDULE, new InitialScheduleTransition()) @@ -181,13 +194,13 @@ TaskEventType.T_ATTEMPT_LAUNCHED, new LaunchTransition()) .addTransition(TaskStateInternal.SCHEDULED, TaskStateInternal.KILL_WAIT, TaskEventType.T_TERMINATE, KILL_TRANSITION) - .addTransition(TaskStateInternal.SCHEDULED, TaskStateInternal.SCHEDULED, + .addTransition(TaskStateInternal.SCHEDULED, EnumSet.of(TaskStateInternal.SCHEDULED, TaskStateInternal.FAILED), TaskEventType.T_ATTEMPT_KILLED, ATTEMPT_KILLED_TRANSITION) .addTransition(TaskStateInternal.SCHEDULED, EnumSet.of(TaskStateInternal.SCHEDULED, TaskStateInternal.FAILED), TaskEventType.T_ATTEMPT_FAILED, new AttemptFailedTransition()) - // Happens in recovery + // Happens in recovery .addTransition(TaskStateInternal.SCHEDULED, EnumSet.of(TaskStateInternal.RUNNING, TaskStateInternal.SUCCEEDED), TaskEventType.T_ATTEMPT_SUCCEEDED, @@ -200,11 +213,11 @@ TaskEventType.T_ATTEMPT_LAUNCHED, new LaunchTransition()) TaskEventType.T_ATTEMPT_LAUNCHED) //more attempts may start later .addTransition(TaskStateInternal.RUNNING, TaskStateInternal.RUNNING, TaskEventType.T_ADD_SPEC_ATTEMPT, new RedundantScheduleTransition()) - .addTransition(TaskStateInternal.RUNNING, + .addTransition(TaskStateInternal.RUNNING, EnumSet.of(TaskStateInternal.SUCCEEDED), TaskEventType.T_ATTEMPT_SUCCEEDED, new AttemptSucceededTransition()) - .addTransition(TaskStateInternal.RUNNING, TaskStateInternal.RUNNING, + .addTransition(TaskStateInternal.RUNNING, EnumSet.of(TaskStateInternal.RUNNING, TaskStateInternal.FAILED), TaskEventType.T_ATTEMPT_KILLED, ATTEMPT_KILLED_TRANSITION) .addTransition(TaskStateInternal.RUNNING, @@ -368,6 +381,7 @@ public TaskImpl(TezVertexID vertexId, int taskIndex, writeLock = readWriteLock.writeLock(); this.attempts = Collections.emptyMap(); maxFailedAttempts = vertex.getVertexConfig().getMaxFailedTaskAttempts(); + maxAttempts = vertex.getVertexConfig().getMaxTaskAttempts(); taskId = TezTaskID.getInstance(vertexId, taskIndex); this.taskCommunicatorManagerInterface = taskCommunicatorManagerInterface; this.taskHeartbeatHandler = thh; @@ -384,7 +398,7 @@ public TaskImpl(TezVertexID vertexId, int taskIndex, stateMachineFactory.make(this), this); augmentStateMachine(); } - + @Override public Map getAttempts() { readLock.lock(); @@ -403,7 +417,7 @@ public Map getAttempts() { readLock.unlock(); } } - + @Override public TaskAttempt getAttempt(TezTaskAttemptID attemptID) { readLock.lock(); @@ -420,7 +434,7 @@ public Vertex getVertex() { } @Override - public TezTaskID getTaskId() { + public TezTaskID getTaskID() { return taskId; } @@ -451,20 +465,25 @@ public TaskReport getReport() { @Override public TezCounters getCounters() { - TezCounters counters = new TezCounters(); - counters.incrAllCounters(this.counters); + TezCounters tezCounters = null; + if (getVertex().isSpeculationEnabled()) { + tezCounters = new TezCounters(); + tezCounters.incrAllCounters(this.counters); + } readLock.lock(); try { TaskAttempt bestAttempt = selectBestAttempt(); - if (bestAttempt != null) { - counters.incrAllCounters(bestAttempt.getCounters()); + TezCounters taskCounters = (bestAttempt != null) ? bestAttempt.getCounters() : TaskAttemptImpl.EMPTY_COUNTERS; + if (getVertex().isSpeculationEnabled()) { + tezCounters.incrAllCounters(taskCounters); + return tezCounters; } - return counters; + return taskCounters; } finally { readLock.unlock(); } } - + TaskStatistics getStatistics() { // simply return the stats from the best attempt readLock.lock(); @@ -509,7 +528,7 @@ public ArrayList getTaskAttemptTezEvents(TezTaskAttemptID attemptID, try { if (!attempts.containsKey(attemptID)) { throw new TezUncheckedException("Unknown TA: " + attemptID - + " asking for events from task:" + getTaskId()); + + " asking for events from task:" + getTaskID()); } if (tezEventsForTaskAttempts.size() > fromEventId) { @@ -535,7 +554,7 @@ public ArrayList getTaskAttemptTezEvents(TezTaskAttemptID attemptID, readLock.unlock(); } } - + @Override public TaskSpec getBaseTaskSpec() { readLock.lock(); @@ -545,7 +564,7 @@ public TaskSpec getBaseTaskSpec() { readLock.unlock(); } } - + @Override public TaskLocationHint getTaskLocationHint() { readLock.lock(); @@ -576,6 +595,11 @@ public long getFinishTime() { } } + @Override + public Set getNodesWithRunningAttempts() { + return nodesWithRunningAttempts; + } + @VisibleForTesting public TaskStateInternal getInternalState() { readLock.lock(); @@ -624,7 +648,7 @@ private long getLastTaskAttemptFinishTime() { long finishTime = 0; for (TaskAttempt at : attempts.values()) { //select the max finish time of all attempts - // FIXME shouldnt this not count attempts killed after an attempt succeeds + // FIXME shouldn't this not count attempts killed after an attempt succeeds if (finishTime < at.getFinishTime()) { finishTime = at.getFinishTime(); } @@ -671,12 +695,10 @@ private TaskAttempt selectBestAttempt() { public boolean canCommit(TezTaskAttemptID taskAttemptID) { writeLock.lock(); try { - if (LOG.isDebugEnabled()) { - LOG.debug("Commit go/no-go request from " + taskAttemptID); - } + LOG.debug("Commit go/no-go request from {}", taskAttemptID); TaskState state = getState(); if (state == TaskState.SCHEDULED) { - // the actual running task ran and is done and asking for commit. we are still stuck + // the actual running task ran and is done and asking for commit. we are still stuck // in the scheduled state which indicates a backlog in event processing. lets wait for the // backlog to clear. returning false will make the attempt come back to us. LOG.info( @@ -684,7 +706,7 @@ public boolean canCommit(TezTaskAttemptID taskAttemptID) { + "Attempt committing before state machine transitioned to running : Task {}", taskId); return false; } - // at this point the attempt is no longer in scheduled state or else we would still + // at this point the attempt is no longer in scheduled state or else we would still // have been in scheduled state in task impl. if (state != TaskState.RUNNING) { LOG.info("Task not running. Issuing kill to bad commit attempt " + taskAttemptID); @@ -711,9 +733,7 @@ public boolean canCommit(TezTaskAttemptID taskAttemptID) { } } else { if (commitAttempt.equals(taskAttemptID)) { - if (LOG.isDebugEnabled()) { - LOG.debug(taskAttemptID + " already given a go for committing the task output."); - } + LOG.debug("{} already given a go for committing the task output.", taskAttemptID); return true; } // Don't think this can be a pluggable decision, so simply raise an @@ -721,9 +741,7 @@ public boolean canCommit(TezTaskAttemptID taskAttemptID) { // Wait for commit attempt to succeed. Dont kill this. If commit // attempt fails then choose a different committer. When commit attempt // succeeds then this and others will be killed - if (LOG.isDebugEnabled()) { - LOG.debug(commitAttempt + " is current committer. Commit waiting for: " + taskAttemptID); - } + LOG.debug("{} is current committer. Commit waiting for: {}", commitAttempt, taskAttemptID); return false; } @@ -741,7 +759,7 @@ TaskAttemptImpl createAttempt(int attemptNumber, TezTaskAttemptID schedulingCaus baseTaskSpec.getTaskConf()); return new TaskAttemptImpl(attemptId, eventHandler, taskCommunicatorManagerInterface, conf, clock, taskHeartbeatHandler, appContext, - (failedAttempts > 0), taskResource, containerContext, leafVertex, getVertex(), + (failedAttempts > 0), taskResource, containerContext, leafVertex, this, locationHint, taskSpec, schedulingCausalTA); } @@ -759,14 +777,14 @@ public TaskAttempt getSuccessfulAttempt() { } // This is always called in the Write Lock - private void addAndScheduleAttempt(TezTaskAttemptID schedulingCausalTA) { + private boolean addAndScheduleAttempt(TezTaskAttemptID schedulingCausalTA) { TaskAttempt attempt = createAttempt(attempts.size(), schedulingCausalTA); if (LOG.isDebugEnabled()) { - LOG.debug("Created attempt " + attempt.getID()); + LOG.debug("Created attempt " + attempt.getTaskAttemptID()); } switch (attempts.size()) { case 0: - attempts = Collections.singletonMap(attempt.getID(), attempt); + attempts = Collections.singletonMap(attempt.getTaskAttemptID(), attempt); break; case 1: @@ -774,16 +792,26 @@ private void addAndScheduleAttempt(TezTaskAttemptID schedulingCausalTA) { = new LinkedHashMap(maxFailedAttempts); newAttempts.putAll(attempts); attempts = newAttempts; - Preconditions.checkArgument(attempts.put(attempt.getID(), attempt) == null, - attempt.getID() + " already existed"); + Preconditions.checkArgument(attempts.put(attempt.getTaskAttemptID(), attempt) == null, + attempt.getTaskAttemptID() + " already existed"); break; - default: - Preconditions.checkArgument(attempts.put(attempt.getID(), attempt) == null, - attempt.getID() + " already existed"); + Preconditions.checkArgument(attempts.put(attempt.getTaskAttemptID(), attempt) == null, + attempt.getTaskAttemptID() + " already existed"); break; } + if (maxAttempts > 0 && attempts.size() == maxAttempts) { + TaskImpl task = (TaskImpl) attempt.getTask(); + LOG.error("Cannot schedule new attempt for task as max number of attempts ({}) reached: {}", + maxAttempts, task); + + task.logJobHistoryTaskFailedEvent(TaskState.FAILED); + task.eventHandler.handle(new VertexEventTaskCompleted(task.taskId, TaskState.FAILED)); + + return false; + } + // TODO: Recovery /* // Update nextATtemptNumber @@ -796,13 +824,13 @@ private void addAndScheduleAttempt(TezTaskAttemptID schedulingCausalTA) { } */ - this.taskAttemptStatus.put(attempt.getID().getId(), false); + this.taskAttemptStatus.put(attempt.getTaskAttemptID().getId(), false); //schedule the nextAttemptNumber // send event to DAG to assign priority and schedule the attempt with global // picture in mind eventHandler.handle(new DAGEventSchedulerUpdate( DAGEventSchedulerUpdate.UpdateType.TA_SCHEDULE, attempt)); - + return true; } @Override @@ -842,17 +870,17 @@ protected void internalError(TaskEventType type) { LOG.error("Invalid event " + type + " on Task " + this.taskId + " in state:" + getInternalState()); eventHandler.handle(new DAGEventDiagnosticsUpdate( - this.taskId.getVertexID().getDAGId(), "Invalid event " + type + + getDAGID(), "Invalid event " + type + " on Task " + this.taskId)); - eventHandler.handle(new DAGEvent(this.taskId.getVertexID().getDAGId(), + eventHandler.handle(new DAGEvent(getDAGID(), DAGEventType.INTERNAL_ERROR)); } protected void internalErrorUncaughtException(TaskEventType type, Exception e) { eventHandler.handle(new DAGEventDiagnosticsUpdate( - this.taskId.getVertexID().getDAGId(), "Uncaught exception when handling event " + type + + getDAGID(), "Uncaught exception when handling event " + type + " on Task " + this.taskId + ", error=" + e.getMessage())); - eventHandler.handle(new DAGEvent(this.taskId.getVertexID().getDAGId(), + eventHandler.handle(new DAGEvent(getDAGID(), DAGEventType.INTERNAL_ERROR)); } @@ -874,12 +902,12 @@ private void sendDAGSchedulerFinishedEvent(TezTaskAttemptID taId) { eventHandler.handle(new DAGEventSchedulerUpdate( DAGEventSchedulerUpdate.UpdateType.TA_COMPLETED, attempts.get(taId))); } - + private static void unSucceed(TaskImpl task) { task.commitAttempt = null; task.successfulAttempt = null; } - + /** * @return a String representation of the splits. * @@ -895,7 +923,7 @@ protected void logJobHistoryTaskStartedEvent() { TaskStartedEvent startEvt = new TaskStartedEvent(taskId, getVertex().getName(), scheduledTime, getLaunchTime()); this.appContext.getHistoryHandler().handle( - new DAGHistoryEvent(taskId.getVertexID().getDAGId(), startEvt)); + new DAGHistoryEvent(getDAGID(), startEvt)); } protected void logJobHistoryTaskFinishedEvent() { @@ -907,18 +935,18 @@ protected void logJobHistoryTaskFinishedEvent() { successfulAttempt, TaskState.SUCCEEDED, "", getCounters(), failedAttempts); this.appContext.getHistoryHandler().handle( - new DAGHistoryEvent(taskId.getVertexID().getDAGId(), finishEvt)); + new DAGHistoryEvent(getDAGID(), finishEvt)); } protected void logJobHistoryTaskFailedEvent(TaskState finalState) { this.finishTime = clock.getTime(); TaskFinishedEvent finishEvt = new TaskFinishedEvent(taskId, getVertex().getName(), getLaunchTime(), this.finishTime, null, - finalState, + finalState, StringUtils.join(getDiagnostics(), LINE_SEPARATOR), getCounters(), failedAttempts); this.appContext.getHistoryHandler().handle( - new DAGHistoryEvent(taskId.getVertexID().getDAGId(), finishEvt)); + new DAGHistoryEvent(getDAGID(), finishEvt)); } private void addDiagnosticInfo(String diag) { @@ -926,7 +954,7 @@ private void addDiagnosticInfo(String diag) { diagnostics.add(diag); } } - + @VisibleForTesting int getUncompletedAttemptsCount() { try { @@ -965,13 +993,13 @@ public TaskStateInternal transition(TaskImpl task, TaskEvent event) { if (task.recoveryData != null) { TaskStartedEvent tStartedEvent = task.recoveryData.getTaskStartedEvent(); TaskFinishedEvent tFinishedEvent = task.recoveryData.getTaskFinishedEvent(); - // If TaskStartedEvent is not seen but TaskFinishedEvent is seen, that means + // If TaskStartedEvent is not seen but TaskFinishedEvent is seen, that means // Task is killed before it is started. Just send T_TERMINATE to itself to move to KILLED if (tStartedEvent == null && tFinishedEvent != null) { Preconditions.checkArgument(tFinishedEvent.getState() == TaskState.KILLED, "TaskStartedEvent is not seen, but TaskFinishedEvent is seen and with invalid state=" - + tFinishedEvent.getState() + ", taskId=" + task.getTaskId()); + + tFinishedEvent.getState() + ", taskId=" + task.getTaskID()); // TODO (TEZ-2938) // use tFinishedEvent.getTerminationCause after adding TaskTerminationCause to TaskFinishedEvent task.eventHandler.handle(new TaskEventTermination(task.taskId, @@ -990,7 +1018,9 @@ public TaskStateInternal transition(TaskImpl task, TaskEvent event) { task.locationHint = scheduleEvent.getTaskLocationHint(); task.baseTaskSpec = scheduleEvent.getBaseTaskSpec(); // For now, initial scheduling dependency is due to vertex manager scheduling - task.addAndScheduleAttempt(null); + if (!task.addAndScheduleAttempt(null)) { + return TaskStateInternal.FAILED; + } return TaskStateInternal.SCHEDULED; } } @@ -1006,15 +1036,34 @@ private static class RedundantScheduleTransition @Override public void transition(TaskImpl task, TaskEvent event) { LOG.info("Scheduling a redundant attempt for task " + task.taskId); - task.counters.findCounter(TaskCounter.NUM_SPECULATIONS).increment(1); - TezTaskAttemptID earliestUnfinishedAttempt = null; + TaskAttempt earliestUnfinishedAttempt = null; for (TaskAttempt ta : task.attempts.values()) { // find the oldest running attempt if (!ta.isFinished()) { - earliestUnfinishedAttempt = ta.getID(); + earliestUnfinishedAttempt = ta; + if (ta.getNodeId() != null) { + task.nodesWithRunningAttempts.add(ta.getNodeId()); + } + } else { + if (TaskAttemptState.SUCCEEDED.equals(ta.getState())) { + LOG.info("Ignore speculation scheduling for task {} since it has succeeded with attempt {}.", + task.getTaskID(), ta.getTaskAttemptID()); + return; + } } } - task.addAndScheduleAttempt(earliestUnfinishedAttempt); + if (earliestUnfinishedAttempt == null) { + // no running (or SUCCEEDED) task attempt at this moment, no need to schedule speculative attempt either + LOG.info("Ignore speculation scheduling since there is no running attempt on task {}.", task.getTaskID()); + return; + } + if (task.commitAttempt != null) { + LOG.info("Ignore speculation scheduling for task {} since commit has started with commitAttempt {}.", + task.getTaskID(), task.commitAttempt); + return; + } + task.counters.findCounter(TaskCounter.NUM_SPECULATIONS).increment(1); + task.addAndScheduleAttempt(earliestUnfinishedAttempt.getTaskAttemptID()); } } @@ -1031,26 +1080,26 @@ private String recoverSuccessTaskAttempt(TaskImpl task) { for (Entry entry : task.getVertex().getOutputCommitters().entrySet()) { LOG.info("Recovering data for task from previous DAG attempt" - + ", taskId=" + task.getTaskId() + + ", taskId=" + task.getTaskID() + ", output=" + entry.getKey()); OutputCommitter committer = entry.getValue(); if (!committer.isTaskRecoverySupported()) { errorMsg = "Task recovery not supported by committer" + ", failing task attempt"; LOG.info(errorMsg - + ", taskId=" + task.getTaskId() + + ", taskId=" + task.getTaskID() + ", attemptId=" + task.successfulAttempt + ", output=" + entry.getKey()); break; } try { - committer.recoverTask(task.getTaskId().getId(), + committer.recoverTask(task.getTaskID().getId(), task.appContext.getApplicationAttemptId().getAttemptId()-1); } catch (Exception e) { errorMsg = "Task recovery failed by committer: " + ExceptionUtils.getStackTrace(e); LOG.warn("Task recovery failed by committer" - + ", taskId=" + task.getTaskId() + + ", taskId=" + task.getTaskID() + ", attemptId=" + task.successfulAttempt + ", output=" + entry.getKey(), e); break; @@ -1070,9 +1119,11 @@ public TaskStateInternal transition(TaskImpl task, TaskEvent event) { String errorMsg = recoverSuccessTaskAttempt(task); if (errorMsg != null) { LOG.info("Can not recover the successful task attempt, schedule new task attempt," - + "taskId=" + task.getTaskId()); + + "taskId=" + task.getTaskID()); task.successfulAttempt = null; - task.addAndScheduleAttempt(successTaId); + if (!task.addAndScheduleAttempt(successTaId)) { + task.finished(TaskStateInternal.FAILED); + } task.eventHandler.handle(new TaskAttemptEventAttemptKilled(successTaId, errorMsg, TaskAttemptTerminationCause.TERMINATED_AT_RECOVERY, true)); return TaskStateInternal.RUNNING; @@ -1104,12 +1155,12 @@ public TaskStateInternal transition(TaskImpl task, TaskEvent event) { // issue kill to all other attempts for (TaskAttempt attempt : task.attempts.values()) { - if (!attempt.getID().equals(task.successfulAttempt) && + if (!attempt.getTaskAttemptID().equals(task.successfulAttempt) && // This is okay because it can only talk us out of sending a // TA_KILL message to an attempt that doesn't need one for // other reasons. !attempt.isFinished()) { - LOG.info("Issuing kill to other attempt " + attempt.getID() + " as attempt: " + + LOG.info("Issuing kill to other attempt " + attempt.getTaskAttemptID() + " as attempt: " + task.successfulAttempt + " has succeeded"); String diagnostics = null; TaskAttemptTerminationCause errCause = null; @@ -1123,7 +1174,7 @@ public TaskStateInternal transition(TaskImpl task, TaskEvent event) { errCause = TaskAttemptTerminationCause.TERMINATED_INEFFECTIVE_SPECULATION; } task.eventHandler.handle(new TaskAttemptEventKillRequest(attempt - .getID(), diagnostics, errCause)); + .getTaskAttemptID(), diagnostics, errCause)); } } return task.finished(TaskStateInternal.SUCCEEDED); @@ -1131,9 +1182,11 @@ public TaskStateInternal transition(TaskImpl task, TaskEvent event) { } private static class AttemptKilledTransition implements - SingleArcTransition { + MultipleArcTransition { @Override - public void transition(TaskImpl task, TaskEvent event) { + public TaskStateInternal transition(TaskImpl task, TaskEvent event) { + TaskStateInternal originalState = task.getInternalState(); + TaskEventTAUpdate castEvent = (TaskEventTAUpdate) event; task.addDiagnosticInfo("TaskAttempt " + castEvent.getTaskAttemptID().getId() + " killed"); if (task.commitAttempt !=null && @@ -1145,10 +1198,27 @@ public void transition(TaskImpl task, TaskEvent event) { TaskAttemptStateInternal.KILLED); // we KillWaitAttemptCompletedTransitionready have a spare task.taskAttemptStatus.put(castEvent.getTaskAttemptID().getId(), true); - task.getVertex().incrementKilledTaskAttemptCount(); + + boolean isRejection = false; + if (event instanceof TaskEventTAKilled) { + TaskEventTAKilled killEvent = (TaskEventTAKilled) event; + if (killEvent.getCausalEvent() instanceof TaskAttemptEventTerminationCauseEvent) { + TaskAttemptEventTerminationCauseEvent cause = + (TaskAttemptEventTerminationCauseEvent)killEvent.getCausalEvent(); + isRejection = cause.getTerminationCause() == TaskAttemptTerminationCause.SERVICE_BUSY; + } + } + if (isRejection) { // TODO: remove as part of TEZ-3881. + task.getVertex().incrementRejectedTaskAttemptCount(); + } else { + task.getVertex().incrementKilledTaskAttemptCount(); + } if (task.shouldScheduleNewAttempt()) { - task.addAndScheduleAttempt(castEvent.getTaskAttemptID()); + if (!task.addAndScheduleAttempt(castEvent.getTaskAttemptID())) { + return task.finished(TaskStateInternal.FAILED); + } } + return originalState; } } @@ -1173,7 +1243,7 @@ public TaskStateInternal transition(TaskImpl task, TaskEvent event) { return task.getInternalState(); } } - + private boolean shouldScheduleNewAttempt() { return (getUncompletedAttemptsCount() == 0 && successfulAttempt == null); @@ -1183,7 +1253,7 @@ private static class AttemptFailedTransition implements MultipleArcTransition { private TezTaskAttemptID schedulingCausalTA; - + @Override public TaskStateInternal transition(TaskImpl task, TaskEvent event) { task.failedAttempts++; @@ -1198,6 +1268,13 @@ public TaskStateInternal transition(TaskImpl task, TaskEvent event) { } // The attempt would have informed the scheduler about it's failure + // Delete the intermediate shuffle data for failed task attempt + TaskAttempt taskAttempt = task.getAttempt(castEvent.getTaskAttemptID()); + if (taskAttempt.getAssignedContainer() != null) { + NodeId nodeId = taskAttempt.getAssignedContainer().getNodeId(); + task.appContext.getAppMaster().taskAttemptFailed(taskAttempt.getTaskAttemptID(), nodeId); + } + task.taskAttemptStatus.put(castEvent.getTaskAttemptID().getId(), true); if (task.failedAttempts < task.maxFailedAttempts && castEvent.getTaskFailureType() == TaskFailureType.NON_FATAL) { @@ -1206,20 +1283,22 @@ public TaskStateInternal transition(TaskImpl task, TaskEvent event) { TaskAttemptStateInternal.FAILED); // we don't need a new event if we already have a spare if (task.shouldScheduleNewAttempt()) { - LOG.info("Scheduling new attempt for task: " + task.getTaskId() + LOG.info("Scheduling new attempt for task: " + task.getTaskID() + ", currentFailedAttempts: " + task.failedAttempts + ", maxFailedAttempts: " - + task.maxFailedAttempts); - task.addAndScheduleAttempt(getSchedulingCausalTA()); + + task.maxFailedAttempts + ", maxAttempts: " + task.maxAttempts); + if (!task.addAndScheduleAttempt(getSchedulingCausalTA())){ + return task.finished(TaskStateInternal.FAILED); + } } } else { if (castEvent.getTaskFailureType() == TaskFailureType.NON_FATAL) { LOG.info( "Failing task: {} due to too many failed attempts. currentFailedAttempts={}, maxFailedAttempts={}", - task.getTaskId(), task.failedAttempts, task.maxFailedAttempts); + task.getTaskID(), task.failedAttempts, task.maxFailedAttempts); } else { LOG.info( "Failing task: {} due to {} error reported by TaskAttempt. CurrentFailedAttempts={}", - task.getTaskId(), TaskFailureType.FATAL, task.failedAttempts); + task.getTaskID(), TaskFailureType.FATAL, task.failedAttempts); } task.handleTaskAttemptCompletion( ((TaskEventTAUpdate) event).getTaskAttemptID(), @@ -1235,7 +1314,7 @@ public TaskStateInternal transition(TaskImpl task, TaskEvent event) { protected TaskStateInternal getDefaultState(TaskImpl task) { return task.getInternalState(); } - + protected TezTaskAttemptID getSchedulingCausalTA() { return schedulingCausalTA; } @@ -1248,12 +1327,6 @@ private static class TaskRetroactiveFailureTransition @Override public TaskStateInternal transition(TaskImpl task, TaskEvent event) { - if (task.leafVertex) { - LOG.error("Unexpected event for task of leaf vertex " + event.getType() + ", taskId: " - + task.getTaskId()); - task.internalError(event.getType()); - } - TaskEventTAFailed castEvent = (TaskEventTAFailed) event; TezTaskAttemptID failedAttemptId = castEvent.getTaskAttemptID(); TaskAttempt failedAttempt = task.getAttempt(failedAttemptId); @@ -1269,7 +1342,7 @@ public TaskStateInternal transition(TaskImpl task, TaskEvent event) { containerId, failedAttemptId, true)); } } - + if (task.getInternalState() == TaskStateInternal.SUCCEEDED && !failedAttemptId.equals(task.successfulAttempt)) { // don't allow a different task attempt to override a previous @@ -1277,9 +1350,14 @@ public TaskStateInternal transition(TaskImpl task, TaskEvent event) { task.taskAttemptStatus.put(failedAttemptId.getId(), true); return TaskStateInternal.SUCCEEDED; } - + + if (task.leafVertex) { + LOG.error("Unexpected event for task of leaf vertex " + event.getType() + ", taskId: " + + task.getTaskID()); + task.internalError(event.getType()); + } Preconditions.checkState(castEvent.getCausalEvent() != null); - TaskAttemptEventOutputFailed destinationEvent = + TaskAttemptEventOutputFailed destinationEvent = (TaskAttemptEventOutputFailed) castEvent.getCausalEvent(); schedulingCausalTA = destinationEvent.getInputFailedEvent().getSourceInfo().getTaskAttemptID(); @@ -1298,7 +1376,7 @@ public TaskStateInternal transition(TaskImpl task, TaskEvent event) { return returnState; } - + @Override protected TezTaskAttemptID getSchedulingCausalTA() { return schedulingCausalTA; @@ -1345,7 +1423,7 @@ public void transition(TaskImpl task, TaskEvent event) { task.addDiagnosticInfo(terminateEvent.getDiagnosticInfo()); if (terminateEvent.isFromRecovery()) { if (LOG.isDebugEnabled()) { - LOG.debug("Recovered to KILLED, taskId=" + task.getTaskId()); + LOG.debug("Recovered to KILLED, taskId=" + task.getTaskID()); } } else { task.logJobHistoryTaskFailedEvent(TaskState.KILLED); @@ -1378,22 +1456,22 @@ public void onStateChanged(TaskImpl task, TaskStateInternal taskStateInternal) { // This is a horrible hack to get around recovery issues. Without this, recovery would fail // for successful vertices. // With this, recovery will end up failing for DAGs making use of InputInitializerEvents - int succesfulAttemptInt = -1; + int successfulAttemptInt = -1; if (successfulAttempt != null) { - succesfulAttemptInt = successfulAttempt.getID().getId(); + successfulAttemptInt = successfulAttempt.getTaskAttemptID().getId(); } - task.stateChangeNotifier.taskSucceeded(task.getVertex().getName(), task.getTaskId(), - succesfulAttemptInt); + task.stateChangeNotifier.taskSucceeded(task.getVertex().getName(), task.getTaskID(), + successfulAttemptInt); } } private void killUnfinishedAttempt(TaskAttempt attempt, String logMsg, TaskAttemptTerminationCause errorCause) { - if (commitAttempt != null && commitAttempt.equals(attempt.getID())) { + if (commitAttempt != null && commitAttempt.equals(attempt.getTaskAttemptID())) { LOG.info("Unsetting commit attempt: " + commitAttempt + " since attempt is being killed"); commitAttempt = null; } if (attempt != null && !attempt.isFinished()) { - eventHandler.handle(new TaskAttemptEventKillRequest(attempt.getID(), logMsg, errorCause)); + eventHandler.handle(new TaskAttemptEventKillRequest(attempt.getTaskAttemptID(), logMsg, errorCause)); } } @@ -1412,7 +1490,7 @@ public long getFirstAttemptStartTime() { readLock.lock(); try { // The first attempt will always have an index of 0. - return getAttempt(TezTaskAttemptID.getInstance(getTaskId(), 0)).getScheduleTime(); + return getAttempt(TezTaskAttemptID.getInstance(getTaskID(), 0)).getScheduleTime(); } finally { readLock.unlock(); } @@ -1449,10 +1527,9 @@ public void transition(TaskImpl task, TaskEvent event) { void setCounters(TezCounters counters) { try { writeLock.lock(); - this.counters = counters; + selectBestAttempt().setCounters(counters); } finally { writeLock.unlock(); } } - } diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/TezContainer.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/TezContainer.java new file mode 100644 index 0000000000..ae58f80b72 --- /dev/null +++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/TezContainer.java @@ -0,0 +1,164 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tez.dag.app.dag.impl; + +import java.util.List; +import java.util.Map; + +import org.apache.hadoop.yarn.api.records.Container; +import org.apache.hadoop.yarn.api.records.ContainerId; +import org.apache.hadoop.yarn.api.records.ExecutionType; +import org.apache.hadoop.yarn.api.records.NodeId; +import org.apache.hadoop.yarn.api.records.Priority; +import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.api.records.Token; +import org.apache.hadoop.yarn.util.RackResolver; +import org.apache.tez.util.StringInterner; + +/** + * Convenience wrapper around {@link org.apache.hadoop.yarn.api.records.Container} + */ +public class TezContainer extends Container { + + public final static TezContainer NULL_TEZ_CONTAINER = new TezContainer(null); + private final Container container; + + public TezContainer(Container container) { + this.container = container; + } + + @Override + public ContainerId getId() { + return container != null ? container.getId() : null; + } + + @Override + public void setId(ContainerId id) { + container.setId(id); + } + + @Override + public NodeId getNodeId() { + return container != null ? container.getNodeId() : null; + } + + @Override + public void setNodeId(NodeId nodeId) { + container.setNodeId(nodeId); + } + + @Override + public String getNodeHttpAddress() { + return container != null ? StringInterner.intern(container.getNodeHttpAddress()) : null; + } + + @Override + public void setNodeHttpAddress(String nodeHttpAddress) { + container.setNodeHttpAddress(nodeHttpAddress); + } + + @Override + public Map>> getExposedPorts() { + return container.getExposedPorts(); + } + + @Override + public void setExposedPorts(Map>> ports) { + container.setExposedPorts(ports); + } + + @Override + public Resource getResource() { + return container.getResource(); + } + + @Override + public void setResource(Resource resource) { + container.setResource(resource); + } + + @Override + public Priority getPriority() { + return container.getPriority(); + } + + @Override + public void setPriority(Priority priority) { + container.setPriority(priority); + } + + @Override + public Token getContainerToken() { + return container.getContainerToken(); + } + + @Override + public void setContainerToken(Token containerToken) { + container.setContainerToken(containerToken); + } + + @Override + public ExecutionType getExecutionType() { + return container.getExecutionType(); + } + + @Override + public void setExecutionType(ExecutionType executionType) { + container.setExecutionType(executionType); + } + + @Override + public int compareTo(Container other) { + if (this.getId().compareTo(other.getId()) == 0) { + if (this.getNodeId().compareTo(other.getNodeId()) == 0) { + return this.getResource().compareTo(other.getResource()); + } else { + return this.getNodeId().compareTo(other.getNodeId()); + } + } else { + return this.getId().compareTo(other.getId()); + } + } + + @Override + public boolean equals(Object other) { + if (other == null) { + return false; + } + if (other.getClass().isAssignableFrom(this.getClass())) { + Container otherContainer = ((TezContainer) other).container; + if (this.container == null && otherContainer == null) { + return true; + } else if (this.container == null) { + return false; + } + return this.container.equals((otherContainer)); + } + return false; + } + + @Override + public int hashCode() { + return container.hashCode(); + } + + public String getRackName() { + return StringInterner.intern(RackResolver.resolve(container.getNodeId().getHost()).getNetworkLocation()); + } +} diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/TezRootInputInitializerContextImpl.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/TezRootInputInitializerContextImpl.java index 4ca4024d27..1c8c326c54 100644 --- a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/TezRootInputInitializerContextImpl.java +++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/TezRootInputInitializerContextImpl.java @@ -18,12 +18,15 @@ package org.apache.tez.dag.app.dag.impl; -import static com.google.common.base.Preconditions.checkNotNull; + import java.util.Set; +import java.util.Objects; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.tez.common.counters.TezCounters; import org.apache.tez.dag.api.InputDescriptor; import org.apache.tez.dag.api.InputInitializerDescriptor; import org.apache.tez.dag.api.RootInputLeafOutput; @@ -49,10 +52,10 @@ public TezRootInputInitializerContextImpl( RootInputLeafOutput input, Vertex vertex, AppContext appContext, RootInputInitializerManager manager) { - checkNotNull(input, "input is null"); - checkNotNull(vertex, "vertex is null"); - checkNotNull(appContext, "appContext is null"); - checkNotNull(manager, "initializerManager is null"); + Objects.requireNonNull(input, "input is null"); + Objects.requireNonNull(vertex, "vertex is null"); + Objects.requireNonNull(appContext, "appContext is null"); + Objects.requireNonNull(manager, "initializerManager is null"); this.input = input; this.vertex = vertex; this.appContext = appContext; @@ -61,7 +64,7 @@ public TezRootInputInitializerContextImpl( @Override public ApplicationId getApplicationId() { - return vertex.getVertexId().getDAGId().getApplicationId(); + return vertex.getVertexId().getDAGID().getApplicationId(); } @Override @@ -83,7 +86,12 @@ public UserPayload getInputUserPayload() { public UserPayload getUserPayload() { return this.input.getControllerDescriptor().getUserPayload(); } - + + @Override + public Configuration getVertexConfiguration() { + return vertex.getConf(); + } + @Override public int getNumTasks() { return vertex.getTotalTasks(); @@ -94,6 +102,11 @@ public Resource getVertexTaskResource() { return vertex.getTaskResource(); } + @Override + public int getVertexId() { + return vertex.getVertexId().getId(); + } + @Override public Resource getTotalAvailableResource() { return appContext.getTaskScheduler().getTotalResources(vertex.getTaskSchedulerIdentifier()); @@ -119,4 +132,8 @@ public void registerForVertexStateUpdates(String vertexName, Set st manager.registerForVertexUpdates(vertexName, input.getName(), stateSet); } + @Override + public void addCounters(final TezCounters tezCounters) { + vertex.addCounters(tezCounters); + } } diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/VertexImpl.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/VertexImpl.java index 59552f2798..62902b8f51 100644 --- a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/VertexImpl.java +++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/VertexImpl.java @@ -42,12 +42,16 @@ import javax.annotation.Nullable; +import com.google.protobuf.CodedInputStream; +import com.google.protobuf.CodedOutputStream; import org.apache.commons.lang.StringUtils; import org.apache.commons.lang.exception.ExceptionUtils; import org.apache.hadoop.classification.InterfaceAudience.Private; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.security.UserGroupInformation; -import org.apache.hadoop.util.StringInterner; +import org.apache.hadoop.service.AbstractService; +import org.apache.hadoop.service.ServiceOperations; +import org.apache.hadoop.service.ServiceStateException; import org.apache.hadoop.yarn.api.records.LocalResource; import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.event.EventHandler; @@ -59,11 +63,13 @@ import org.apache.hadoop.yarn.util.Clock; import org.apache.tez.client.TezClientUtils; import org.apache.tez.common.ATSConstants; +import org.apache.tez.common.GuavaShim; +import org.apache.tez.common.ProgressHelper; import org.apache.tez.common.ReflectionUtils; import org.apache.tez.common.TezUtilsInternal; +import org.apache.tez.common.counters.AggregateTezCounters; import org.apache.tez.common.counters.LimitExceededException; import org.apache.tez.common.counters.TezCounters; -import org.apache.tez.common.io.NonSyncByteArrayInputStream; import org.apache.tez.common.io.NonSyncByteArrayOutputStream; import org.apache.tez.dag.api.DagTypeConverters; import org.apache.tez.dag.api.EdgeManagerPluginDescriptor; @@ -108,6 +114,8 @@ import org.apache.tez.dag.app.dag.RootInputInitializerManager; import org.apache.tez.dag.app.dag.StateChangeNotifier; import org.apache.tez.dag.app.dag.Task; +import org.apache.tez.dag.app.dag.TaskAttempt; +import org.apache.hadoop.yarn.api.records.NodeId; import org.apache.tez.dag.app.dag.TaskAttemptStateInternal; import org.apache.tez.dag.app.dag.TaskTerminationCause; import org.apache.tez.dag.app.dag.Vertex; @@ -124,6 +132,7 @@ import org.apache.tez.dag.app.dag.event.TaskEventScheduleTask; import org.apache.tez.dag.app.dag.event.TaskEventTermination; import org.apache.tez.dag.app.dag.event.TaskEventType; +import org.apache.tez.dag.app.dag.event.VertexShuffleDataDeletion; import org.apache.tez.dag.app.dag.event.VertexEvent; import org.apache.tez.dag.app.dag.event.VertexEventCommitCompleted; import org.apache.tez.dag.app.dag.event.VertexEventInputDataInformation; @@ -167,6 +176,7 @@ import org.apache.tez.runtime.api.TaskAttemptIdentifier; import org.apache.tez.runtime.api.VertexStatistics; import org.apache.tez.runtime.api.events.CompositeDataMovementEvent; +import org.apache.tez.runtime.api.events.CustomProcessorEvent; import org.apache.tez.runtime.api.events.DataMovementEvent; import org.apache.tez.runtime.api.events.InputDataInformationEvent; import org.apache.tez.runtime.api.events.InputFailedEvent; @@ -180,13 +190,15 @@ import org.apache.tez.runtime.api.impl.TaskSpec; import org.apache.tez.runtime.api.impl.TaskStatistics; import org.apache.tez.runtime.api.impl.TezEvent; +import org.apache.tez.runtime.library.common.shuffle.ShuffleUtils; import org.apache.tez.state.OnStateChangedCallback; import org.apache.tez.state.StateMachineTez; +import org.apache.tez.util.StringInterner; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; +import org.apache.tez.common.Preconditions; import com.google.common.base.Strings; import com.google.common.collect.HashMultiset; import com.google.common.collect.Lists; @@ -223,6 +235,7 @@ public class VertexImpl implements org.apache.tez.dag.app.dag.Vertex, EventHandl private final AppContext appContext; private final DAG dag; private final VertexRecoveryData recoveryData; + private boolean isVertexInitSkipped = false; private List initGeneratedEvents = new ArrayList(); // set it to be true when setParallelism is called(used for recovery) private boolean setParallelismCalledFlag = false; @@ -231,6 +244,7 @@ public class VertexImpl implements org.apache.tez.dag.app.dag.Vertex, EventHandl // must be a linked map for ordering volatile LinkedHashMap tasks = new LinkedHashMap(); private Object fullCountersLock = new Object(); + private TezCounters counters = new TezCounters(); private TezCounters fullCounters = null; private TezCounters cachedCounters = null; private long cachedCountersTimestamp = 0; @@ -254,6 +268,13 @@ public class VertexImpl implements org.apache.tez.dag.app.dag.Vertex, EventHandl final ServicePluginInfo servicePluginInfo; + /* + * For every upstream host (as map keys) contains every unique downstream hostnames that reported INPUT_READ_ERROR. + * This map helps to decide if there is a problem with the host that produced the map outputs. There is an assumption + * that if multiple downstream hosts report input errors for the same upstream host, then it's likely that the output + * has to be blamed and needs to rerun. + */ + private final Map> downstreamBlamingHosts = Maps.newHashMap(); private final float maxFailuresPercent; private boolean logSuccessDiagnostics = false; @@ -302,8 +323,10 @@ public class VertexImpl implements org.apache.tez.dag.app.dag.Vertex, EventHandl @VisibleForTesting final List pendingVmEvents = new LinkedList<>(); - - LegacySpeculator speculator; + + private final AtomicBoolean servicesInited; + private LegacySpeculator speculator; + private List services; @VisibleForTesting Map> commitFutures = new ConcurrentHashMap>(); @@ -512,6 +535,11 @@ public class VertexImpl implements org.apache.tez.dag.app.dag.Vertex, EventHandl EnumSet.of(VertexState.TERMINATING, VertexState.KILLED, VertexState.FAILED, VertexState.ERROR), VertexEventType.V_TASK_COMPLETED, new TaskCompletedTransition()) + .addTransition + (VertexState.TERMINATING, + EnumSet.of(VertexState.TERMINATING, VertexState.KILLED, VertexState.FAILED, VertexState.ERROR), + VertexEventType.V_COMPLETED, + new VertexNoTasksCompletedTransition()) .addTransition( VertexState.TERMINATING, VertexState.ERROR, VertexEventType.V_INTERNAL_ERROR, @@ -533,7 +561,7 @@ public class VertexImpl implements org.apache.tez.dag.app.dag.Vertex, EventHandl VertexEventType.V_SOURCE_TASK_ATTEMPT_COMPLETED, VertexEventType.V_TASK_ATTEMPT_COMPLETED, VertexEventType.V_TASK_RESCHEDULED, - VertexEventType.V_COMPLETED)) + VertexEventType.V_DELETE_SHUFFLE_DATA)) // Transitions from SUCCEEDED state .addTransition( @@ -569,6 +597,9 @@ public class VertexImpl implements org.apache.tez.dag.app.dag.Vertex, EventHandl .addTransition(VertexState.SUCCEEDED, VertexState.SUCCEEDED, VertexEventType.V_TASK_ATTEMPT_COMPLETED, new TaskAttemptCompletedEventTransition()) + .addTransition(VertexState.SUCCEEDED, VertexState.SUCCEEDED, + VertexEventType.V_DELETE_SHUFFLE_DATA, + new VertexShuffleDeleteTransition()) // Transitions from FAILED state @@ -590,7 +621,8 @@ public class VertexImpl implements org.apache.tez.dag.app.dag.Vertex, EventHandl VertexEventType.V_ROOT_INPUT_INITIALIZED, VertexEventType.V_SOURCE_TASK_ATTEMPT_COMPLETED, VertexEventType.V_NULL_EDGE_INITIALIZED, - VertexEventType.V_INPUT_DATA_INFORMATION)) + VertexEventType.V_INPUT_DATA_INFORMATION, + VertexEventType.V_DELETE_SHUFFLE_DATA)) // Transitions from KILLED state .addTransition( @@ -612,7 +644,8 @@ public class VertexImpl implements org.apache.tez.dag.app.dag.Vertex, EventHandl VertexEventType.V_TASK_COMPLETED, VertexEventType.V_ROOT_INPUT_INITIALIZED, VertexEventType.V_NULL_EDGE_INITIALIZED, - VertexEventType.V_INPUT_DATA_INFORMATION)) + VertexEventType.V_INPUT_DATA_INFORMATION, + VertexEventType.V_DELETE_SHUFFLE_DATA)) // No transitions from INTERNAL_ERROR state. Ignore all. .addTransition( @@ -632,7 +665,8 @@ public class VertexImpl implements org.apache.tez.dag.app.dag.Vertex, EventHandl VertexEventType.V_INTERNAL_ERROR, VertexEventType.V_ROOT_INPUT_INITIALIZED, VertexEventType.V_NULL_EDGE_INITIALIZED, - VertexEventType.V_INPUT_DATA_INFORMATION)) + VertexEventType.V_INPUT_DATA_INFORMATION, + VertexEventType.V_DELETE_SHUFFLE_DATA)) // create the topology tables .installTopology(); @@ -647,7 +681,7 @@ private void augmentStateMachine() { .registerStateEnteredCallback(VertexState.RUNNING, STATE_CHANGED_CALLBACK) .registerStateEnteredCallback(VertexState.INITIALIZING, - STATE_CHANGED_CALLBACK);; + STATE_CHANGED_CALLBACK); } private final StateMachineTez stateMachine; @@ -674,6 +708,7 @@ private void augmentStateMachine() { AtomicInteger failedTaskAttemptCount = new AtomicInteger(0); @VisibleForTesting AtomicInteger killedTaskAttemptCount = new AtomicInteger(0); + AtomicInteger rejectedTaskAttemptCount = new AtomicInteger(0); @VisibleForTesting long initTimeRequested; // Time at which INIT request was received. @@ -705,6 +740,9 @@ private void augmentStateMachine() { @VisibleForTesting Map sourceVertices; private Map targetVertices; + private boolean cleanupShuffleDataAtVertexLevel; + @VisibleForTesting + VertexShuffleDataDeletionContext vShuffleDeletionContext; Set uninitializedEdges = Sets.newHashSet(); // using a linked hash map to conveniently map edge names to a contiguous index LinkedHashMap ioIndices = Maps.newLinkedHashMap(); @@ -860,6 +898,85 @@ void resetCompletedTaskStatsCache(boolean recompute) { } } + @Override + public void initServices() { + if (servicesInited.get()) { + LOG.debug("Skipping Initing services for vertex because already" + + " Initialized, name={}", this.vertexName); + return; + } + writeLock.lock(); + try { + List servicesToAdd = new ArrayList<>(); + if (isSpeculationEnabled()) { + // Initialize the speculator + LOG.debug("Initing service vertex speculator, name={}", this.vertexName); + speculator = new LegacySpeculator(vertexConf, getAppContext(), this); + speculator.init(vertexConf); + servicesToAdd.add(speculator); + } + services = Collections.synchronizedList(servicesToAdd); + servicesInited.set(true); + } finally { + writeLock.unlock(); + } + LOG.debug("Initing service vertex, name={}", this.vertexName); + } + + @Override + public void startServices() { + writeLock.lock(); + try { + if (!servicesInited.get()) { + initServices(); + } + for (AbstractService srvc : services) { + if (LOG.isDebugEnabled()) { + LOG.debug("starting service : " + srvc.getName() + + ", for vertex: " + getName()); + } + srvc.start(); + } + } finally { + writeLock.unlock(); + } + } + + @Override + public void stopServices() { + Exception firstException = null; + List stoppedServices = new ArrayList<>(); + writeLock.lock(); + try { + if (servicesInited.get()) { + for (AbstractService srvc : services) { + LOG.debug("Stopping service : {}", srvc); + Exception ex = ServiceOperations.stopQuietly(srvc); + if (ex != null && firstException == null) { + LOG.warn(String.format( + "Failed to stop service=(%s) for vertex name=(%s)", + srvc.getName(), getName()), ex); + firstException = ex; + } else { + stoppedServices.add(srvc); + } + } + services.clear(); + } + servicesInited.set(false); + } finally { + writeLock.unlock(); + } + // wait for services to stop + for (AbstractService srvc : stoppedServices) { + srvc.waitForServiceToStop(60000L); + } + // After stopping all services, rethrow the first exception raised + if (firstException != null) { + throw ServiceStateException.convert(firstException); + } + } + public VertexImpl(TezVertexID vertexId, VertexPlan vertexPlan, String vertexName, Configuration dagConf, EventHandler eventHandler, TaskCommunicatorManagerInterface taskCommunicatorManagerInterface, Clock clock, @@ -869,7 +986,7 @@ public VertexImpl(TezVertexID vertexId, VertexPlan vertexPlan, StateChangeNotifier entityStatusTracker, Configuration dagOnlyConf) { this.vertexId = vertexId; this.vertexPlan = vertexPlan; - this.vertexName = StringInterner.weakIntern(vertexName); + this.vertexName = StringInterner.intern(vertexName); this.vertexConf = new Configuration(dagConf); this.vertexOnlyConf = new Configuration(dagOnlyConf); if (vertexPlan.hasVertexConf()) { @@ -963,11 +1080,11 @@ public VertexImpl(TezVertexID vertexId, VertexPlan vertexPlan, this.dagVertexGroups = dagVertexGroups; - isSpeculationEnabled = vertexConf.getBoolean(TezConfiguration.TEZ_AM_SPECULATION_ENABLED, - TezConfiguration.TEZ_AM_SPECULATION_ENABLED_DEFAULT); - if (isSpeculationEnabled()) { - speculator = new LegacySpeculator(vertexConf, getAppContext(), this); - } + isSpeculationEnabled = + vertexConf.getBoolean(TezConfiguration.TEZ_AM_SPECULATION_ENABLED, + TezConfiguration.TEZ_AM_SPECULATION_ENABLED_DEFAULT); + servicesInited = new AtomicBoolean(false); + initServices(); maxFailuresPercent = vertexConf.getFloat(TezConfiguration.TEZ_VERTEX_FAILURES_MAXPERCENT, TezConfiguration.TEZ_VERTEX_FAILURES_MAXPERCENT_DEFAULT); @@ -1048,7 +1165,9 @@ public VertexImpl(TezVertexID vertexId, VertexPlan vertexPlan, .append(", ContainerLauncher=").append(containerLauncherIdentifier).append(":").append(containerLauncherName) .append(", TaskCommunicator=").append(taskCommunicatorIdentifier).append(":").append(taskCommName); LOG.info(sb.toString()); - + cleanupShuffleDataAtVertexLevel = vertexConf.getInt(TezConfiguration.TEZ_AM_VERTEX_CLEANUP_HEIGHT, + TezConfiguration.TEZ_AM_VERTEX_CLEANUP_HEIGHT_DEFAULT) > 0 && + ShuffleUtils.isTezShuffleHandler(vertexConf); stateMachine = new StateMachineTez( stateMachineFactory.make(this), this); augmentStateMachine(); @@ -1189,7 +1308,8 @@ public TezCounters getAllCounters() { } TezCounters counters = new TezCounters(); - return incrTaskCounters(counters, tasks.values()); + counters.aggrAllCounters(this.counters); + return aggrTaskCounters(counters, tasks.values()); } finally { readLock.unlock(); @@ -1217,13 +1337,19 @@ public TezCounters getCachedCounters() { } TezCounters counters = new TezCounters(); - cachedCounters = incrTaskCounters(counters, tasks.values()); + counters.aggrAllCounters(this.counters); + cachedCounters = aggrTaskCounters(counters, tasks.values()); return cachedCounters; } finally { readLock.unlock(); } } - + + @Override + public void addCounters(final TezCounters tezCounters) { + counters.aggrAllCounters(tezCounters); + } + @Override public int getMaxTaskConcurrency() { return vertexConf.getInt(TezConfiguration.TEZ_AM_VERTEX_MAX_TASK_CONCURRENCY, @@ -1320,10 +1446,10 @@ boolean inTerminalState() { return false; } - public static TezCounters incrTaskCounters( + public static TezCounters aggrTaskCounters( TezCounters counters, Collection tasks) { for (Task task : tasks) { - counters.incrAllCounters(task.getCounters()); + counters.aggrAllCounters(task.getCounters()); } return counters; } @@ -1420,6 +1546,7 @@ public ProgressBuilder getVertexProgress() { progress.setKilledTaskCount(killedTaskCount); progress.setFailedTaskAttemptCount(failedTaskAttemptCount.get()); progress.setKilledTaskAttemptCount(killedTaskAttemptCount.get()); + progress.setRejectedTaskAttemptCount(rejectedTaskAttemptCount.get()); return progress; } finally { this.readLock.unlock(); @@ -1432,6 +1559,7 @@ public VertexStatusBuilder getVertexStatus( this.readLock.lock(); try { VertexStatusBuilder status = new VertexStatusBuilder(); + status.setId(getVertexId()); status.setState(getInternalState()); status.setDiagnostics(diagnostics); status.setProgress(getVertexProgress()); @@ -1462,20 +1590,31 @@ public TaskLocationHint getTaskLocationHint(TezTaskID taskId) { List getOnDemandRouteEvents() { return onDemandRouteEvents; } - + + /** + * Updates the progress value in the vertex. + * This should be called only when the vertex is running state. + * No need to acquire the lock since this is nested inside + * {@link #getProgress() getProgress} method. + */ private void computeProgress() { - this.readLock.lock(); - try { - float progress = 0f; - for (Task task : this.tasks.values()) { - progress += (task.getProgress()); - } - if (this.numTasks != 0) { - progress /= this.numTasks; + + float accProg = 0.0f; + int tasksCount = this.tasks.size(); + for (Task task : this.tasks.values()) { + float taskProg = task.getProgress(); + if (LOG.isDebugEnabled()) { + if (!ProgressHelper.isProgressWithinRange(taskProg)) { + LOG.debug("progress update: vertex={}, task={} incorrect; range={}", + getName(), task.getTaskID(), taskProg); + } } - this.progress = progress; - } finally { - this.readLock.unlock(); + accProg += ProgressHelper.processProgress(taskProg); + } + // tasksCount is 0, do not reset the current progress. + if (tasksCount > 0) { + // force the progress to be below within the range + progress = ProgressHelper.processProgress(accProg / tasksCount); } } @@ -1541,6 +1680,11 @@ public void incrementKilledTaskAttemptCount() { this.killedTaskAttemptCount.incrementAndGet(); } + @Override + public void incrementRejectedTaskAttemptCount() { + this.rejectedTaskAttemptCount.incrementAndGet(); + } + @Override public int getFailedTaskAttemptCount() { return this.failedTaskAttemptCount.get(); @@ -1551,6 +1695,11 @@ public int getKilledTaskAttemptCount() { return this.killedTaskAttemptCount.get(); } + @Override + public int getRejectedTaskAttemptCount() { + return this.rejectedTaskAttemptCount.get(); + } + private void setTaskLocationHints(VertexLocationHint vertexLocationHint) { if (vertexLocationHint != null && vertexLocationHint.getTaskLocationHints() != null && @@ -1919,7 +2068,7 @@ public void doneReconfiguringVertex() { */ public void handle(VertexEvent event) { if (LOG.isDebugEnabled()) { - LOG.debug("Processing VertexEvent " + event.getVertexId() + LOG.debug("Processing VertexEvent " + event.getVertexID() + " of type " + event.getType() + " while in state " + getInternalState() + ". Event: " + event); } @@ -1981,7 +2130,7 @@ protected void addTask(Task task) { lazyTasksCopyNeeded = false; } } - tasks.put(task.getTaskId(), task); + tasks.put(task.getTaskID(), task); // TODO Metrics //metrics.waitingTask(task); } @@ -2031,7 +2180,7 @@ void logJobHistoryVertexFinishedEvent() throws IOException { || !recoveryData.isVertexSucceeded()) { logJobHistoryVertexCompletedHelper(VertexState.SUCCEEDED, finishTime, logSuccessDiagnostics ? StringUtils.join(getDiagnostics(), LINE_SEPARATOR) : "", - getAllCounters()); + constructFinalFullcounters()); } } @@ -2040,7 +2189,7 @@ void logJobHistoryVertexFailedEvent(VertexState state) throws IOException { || !recoveryData.isVertexFinished()) { TezCounters counters = null; try { - counters = getAllCounters(); + counters = constructFinalFullcounters(); } catch (LimitExceededException e) { // Ignore as failed vertex addDiagnostic("Counters limit exceeded: " + e.getMessage()); @@ -2127,7 +2276,7 @@ public Void run() throws Exception { }; ListenableFuture commitFuture = vertex.getAppContext().getExecService().submit(commitCallableEvent); - Futures.addCallback(commitFuture, commitCallableEvent.getCallback()); + Futures.addCallback(commitFuture, commitCallableEvent.getCallback(), GuavaShim.directExecutor()); vertex.commitFutures.put(outputName, commitFuture); } } @@ -2173,6 +2322,12 @@ static VertexState checkTasksForCompletion(final VertexImpl vertex) { if((vertexSucceeded || vertexFailuresBelowThreshold) && vertex.terminationCause == null) { if(vertexSucceeded) { LOG.info("All tasks have succeeded, vertex:" + vertex.logIdentifier); + if (vertex.cleanupShuffleDataAtVertexLevel) { + + for (Vertex v : vertex.vShuffleDeletionContext.getAncestors()) { + vertex.eventHandler.handle(new VertexShuffleDataDeletion(vertex, v)); + } + } } else { LOG.info("All tasks in the vertex " + vertex.logIdentifier + " have completed and the percentage of failed tasks (failed/total) (" + vertex.failedTaskCount + "/" + vertex.numTasks + ") is less that the threshold of " + vertex.maxFailuresPercent); vertex.addDiagnostic("Vertex succeeded as percentage of failed tasks (failed/total) (" + vertex.failedTaskCount + "/" + vertex.numTasks + ") is less that the threshold of " + vertex.maxFailuresPercent); @@ -2277,7 +2432,7 @@ void tryEnactKill(VertexTerminationCause trigger, LOG.info(msg); for (Task task : tasks.values()) { eventHandler.handle( // attempt was terminated because the vertex is shutting down - new TaskEventTermination(task.getTaskId(), errCause, msg)); + new TaskEventTermination(task.getTaskID(), errCause, msg)); } } } @@ -2302,6 +2457,11 @@ VertexState finished(VertexState finalState, abortVertex(VertexStatus.State.valueOf(finalState.name())); eventHandler.handle(new DAGEvent(getDAGId(), DAGEventType.INTERNAL_ERROR)); + if (LOG.isDebugEnabled()) { + LOG.debug("stopping services attached to the succeeded Vertex," + + "name=" + getName()); + } + stopServices(); try { logJobHistoryVertexFailedEvent(finalState); } catch (IOException e) { @@ -2317,6 +2477,11 @@ VertexState finished(VertexState finalState, abortVertex(VertexStatus.State.valueOf(finalState.name())); eventHandler.handle(new DAGEventVertexCompleted(getVertexId(), finalState, terminationCause)); + if (LOG.isDebugEnabled()) { + LOG.debug("stopping services attached to the succeeded Vertex," + + "name=" + getName()); + } + stopServices(); try { logJobHistoryVertexFailedEvent(finalState); } catch (IOException e) { @@ -2329,6 +2494,12 @@ VertexState finished(VertexState finalState, logJobHistoryVertexFinishedEvent(); eventHandler.handle(new DAGEventVertexCompleted(getVertexId(), finalState)); + // Stop related services + if (LOG.isDebugEnabled()) { + LOG.debug("stopping services attached to the succeeded Vertex," + + "name=" + getName()); + } + stopServices(); } catch (LimitExceededException e) { LOG.error("Counter limits exceeded for vertex: " + getLogIdentifier(), e); finalState = VertexState.FAILED; @@ -2347,6 +2518,12 @@ VertexState finished(VertexState finalState, } break; default: + // Stop related services + if (LOG.isDebugEnabled()) { + LOG.debug("stopping services attached with Unexpected State," + + "name=" + getName()); + } + stopServices(); throw new TezUncheckedException("Unexpected VertexState: " + finalState); } return finalState; @@ -2367,11 +2544,8 @@ private void initializeCommitters() throws Exception { final RootInputLeafOutput od = entry.getValue(); if (od.getControllerDescriptor() == null || od.getControllerDescriptor().getClassName() == null) { - if (LOG.isDebugEnabled()) { - LOG.debug("Ignoring committer as none specified for output=" - + outputName - + ", vertexId=" + logIdentifier); - } + LOG.debug("Ignoring committer as none specified for output={}, vertexId={}", + outputName, logIdentifier); continue; } LOG.info("Instantiating committer for output=" + outputName @@ -2386,25 +2560,21 @@ public Void run() throws Exception { appContext.getApplicationAttemptId().getAttemptId(), appContext.getCurrentDAG().getName(), vertexName, - od, - vertexId.getId()); + appContext.getCurrentDAG().getID().getId(), + vertexId.getId(), + od + ); OutputCommitter outputCommitter = ReflectionUtils .createClazzInstance(od.getControllerDescriptor().getClassName(), new Class[]{OutputCommitterContext.class}, new Object[]{outputCommitterContext}); - if (LOG.isDebugEnabled()) { - LOG.debug("Invoking committer init for output=" + outputName - + ", vertex=" + logIdentifier); - } + LOG.debug("Invoking committer init for output={}, vertex={}", outputName, logIdentifier); try { TezUtilsInternal.setHadoopCallerContext(appContext.getHadoopShim(), vertexId); outputCommitter.initialize(); outputCommitters.put(outputName, outputCommitter); - if (LOG.isDebugEnabled()) { - LOG.debug("Invoking committer setup for output=" + outputName - + ", vertex=" + logIdentifier); - } + LOG.debug("Invoking committer setup for output={}, vertex={}", outputName, logIdentifier); outputCommitter.setupOutput(); } finally { appContext.getHadoopShim().clearHadoopCallerContext(); @@ -2431,6 +2601,8 @@ private boolean initializeVertex() { } else { initedTime = clock.getTime(); } + // set the vertex services to be initialized. + initServices(); // Only initialize committer when it is in non-recovery mode or vertex is not recovered to completed // state in recovery mode if (recoveryData == null || recoveryData.getVertexFinishedEvent() == null) { @@ -2501,7 +2673,7 @@ private void createTasks() { this.addTask(task); if(LOG.isDebugEnabled()) { LOG.debug("Created task for vertex " + logIdentifier + ": " + - task.getTaskId()); + task.getTaskID()); } } } @@ -2515,7 +2687,7 @@ private void addTasks(int newNumTasks) { this.numTasks++; if(LOG.isDebugEnabled()) { LOG.debug("Created task for vertex " + logIdentifier + ": " + - task.getTaskId()); + task.getTaskID()); } } } @@ -2578,7 +2750,7 @@ private VertexState setupVertex() { : rootInputDescriptors.values()) { if (input.getControllerDescriptor() != null && input.getControllerDescriptor().getClassName() != null) { - if (inputsWithInitializers == null) { + if (!hasInputInitializers()) { inputsWithInitializers = Sets.newHashSet(); } inputsWithInitializers.add(input.getName()); @@ -2599,7 +2771,7 @@ private VertexState setupVertex() { } } - if (hasBipartite && inputsWithInitializers != null) { + if (hasBipartite && hasInputInitializers()) { LOG.error("A vertex with an Initial Input and a Shuffle Input are not supported at the moment"); return finished(VertexState.FAILED); } @@ -2647,20 +2819,61 @@ private VertexState setupVertex() { return VertexState.INITED; } - private void assignVertexManager() throws TezException { + private boolean hasInputInitializers() { + return inputsWithInitializers != null; + } + + private boolean usesRootInputVertexManager() { + // RootInputVertexManager can start tasks even though any parents are not fully initialized. + if (vertexPlan.hasVertexManagerPlugin()) { + final VertexManagerPluginDescriptor pluginDesc = DagTypeConverters + .convertVertexManagerPluginDescriptorFromDAGPlan(vertexPlan.getVertexManagerPlugin()); + return pluginDesc.getClassName().equals(RootInputVertexManager.class.getName()); + } else { + // This case implicitly uses RootInputVertexManager. See VertexImpl#assignVertexManager + return hasInputInitializers(); + } + } + + private boolean isVertexInitSkippedInParentVertices() { + for (Map.Entry entry : sourceVertices.entrySet()) { + if(!(((VertexImpl) entry.getKey()).isVertexInitSkipped())) { + return false; + } + } + return true; + } + + private boolean canSkipInitialization() { // condition for skip initializing stage - // - VertexInputInitializerEvent is seen - // - VertexReconfigureDoneEvent is seen - // - Reason to check whether VertexManager has complete its responsibility + // - VertexInitializedEvent is seen + // - VertexConfigurationDoneEvent is seen + // - Reason to check whether VertexManager has completed its responsibility // VertexManager actually is involved in the InputInitializer (InputInitializer generate events // and send them to VertexManager which do some processing and send back to Vertex), so that means - // Input initializer will affect on the VertexManager and we couldn't skip the initializing step if + // Input initializer will affect on the VertexManager and we couldn't skip the initializing step if // VertexManager has not completed its responsibility. - // - Why using VertexReconfigureDoneEvent - // - VertexReconfigureDoneEvent represent the case that user use API reconfigureVertex - // VertexReconfigureDoneEvent will be logged - if (recoveryData != null - && recoveryData.shouldSkipInit()) { + // - Why using VertexConfigurationDoneEvent + // - VertexConfigurationDoneEvent represent the case that user use API reconfigureVertex + // VertexConfigurationDoneEvent will be logged + // - TaskStartEvent is seen in that vertex or setVertexParallelism is called + // - All the parent vertices have skipped initializing stage while recovering + // - Or RootInputVertexManager is used, which can start without waiting for parent vertices + if (recoveryData == null) { + return false; + } + if (!recoveryData.shouldSkipInit()) { + return false; + } + if (!recoveryData.isVertexTasksStarted() + && !recoveryData.getVertexConfigurationDoneEvent().isSetParallelismCalled()) { + return false; + } + return isVertexInitSkippedInParentVertices() || usesRootInputVertexManager(); + } + + private void assignVertexManager() throws TezException { + if (canSkipInitialization()) { // Replace the original VertexManager with NoOpVertexManager if the reconfiguration is done in the last AM attempt VertexConfigurationDoneEvent reconfigureDoneEvent = recoveryData.getVertexConfigurationDoneEvent(); if (LOG.isInfoEnabled()) { @@ -2670,14 +2883,17 @@ private void assignVertexManager() throws TezException { } NonSyncByteArrayOutputStream out = new NonSyncByteArrayOutputStream(); try { - reconfigureDoneEvent.toProtoStream(out); + CodedOutputStream codedOutputStream = CodedOutputStream.newInstance(out); + reconfigureDoneEvent.toProtoStream(codedOutputStream); + codedOutputStream.flush(); } catch (IOException e) { - throw new TezUncheckedException("Unable to deserilize VertexReconfigureDoneEvent"); + throw new TezUncheckedException("Unable to deserialize VertexReconfigureDoneEvent"); } this.vertexManager = new VertexManager( VertexManagerPluginDescriptor.create(NoOpVertexManager.class.getName()) .setUserPayload(UserPayload.create(ByteBuffer.wrap(out.toByteArray()))), dagUgi, this, appContext, stateChangeNotifier); + isVertexInitSkipped = true; return; } @@ -2721,7 +2937,7 @@ private void assignVertexManager() throws TezException { // If there is a one to one edge then we use the InputReadyVertexManager // If there is a scatter-gather edge then we use the ShuffleVertexManager // Else we use the default ImmediateStartVertexManager - if (inputsWithInitializers != null) { + if (hasInputInitializers()) { LOG.info("Setting vertexManager to RootInputVertexManager for " + logIdentifier); vertexManager = new VertexManager(RootInputVertexManager @@ -2755,7 +2971,7 @@ private static List getTaskAttemptIdentifiers(DAG dag, List attempts = new ArrayList(taIds.size()); String dagName = dag.getName(); for (TezTaskAttemptID taId : taIds) { - String vertexName = dag.getVertex(taId.getTaskID().getVertexID()).getName(); + String vertexName = dag.getVertex(taId.getVertexID()).getName(); attempts.add(getTaskAttemptIdentifier(dagName, vertexName, taId)); } return attempts; @@ -2896,7 +3112,7 @@ private VertexState handleInitEvent(VertexImpl vertex) { LOG.info("Num tasks is -1. Expecting VertexManager/InputInitializers/1-1 split" + " to set #tasks for the vertex " + vertex.getLogIdentifier()); - if (vertex.inputsWithInitializers != null) { + if (vertex.hasInputInitializers()) { if (vertex.recoveryData == null || !vertex.recoveryData.shouldSkipInit()) { LOG.info("Vertex will initialize from input initializer. " + vertex.logIdentifier); try { @@ -2935,8 +3151,7 @@ private VertexState handleInitEvent(VertexImpl vertex) { LOG.info("Creating " + vertex.numTasks + " tasks for vertex: " + vertex.logIdentifier); vertex.createTasks(); // this block may return VertexState.INITIALIZING - if (vertex.inputsWithInitializers != null && - (vertex.recoveryData == null || !vertex.recoveryData.shouldSkipInit())) { + if (vertex.hasInputInitializers() && (vertex.recoveryData == null || !vertex.recoveryData.shouldSkipInit())) { LOG.info("Vertex will initialize from input initializer. " + vertex.logIdentifier); try { vertex.setupInputInitializerManager(); @@ -3287,6 +3502,12 @@ public Void run() { if (finishTime == 0) { setFinishTime(); } + // Stop related services + if (LOG.isDebugEnabled()) { + LOG.debug("stopping services attached to the aborted Vertex, name=" + + getName()); + } + stopServices(); } private void mayBeConstructFinalFullCounters() { @@ -3297,7 +3518,7 @@ private void mayBeConstructFinalFullCounters() { // Already constructed. Just return. return; } - this.constructFinalFullcounters(); + this.fullCounters = this.constructFinalFullcounters(); } } @@ -3306,15 +3527,17 @@ private VertexStatisticsImpl constructStatistics() { } @Private - public void constructFinalFullcounters() { - this.fullCounters = new TezCounters(); + public TezCounters constructFinalFullcounters() { + AggregateTezCounters aggregateTezCounters = new AggregateTezCounters(); + aggregateTezCounters.aggrAllCounters(counters); this.vertexStats = new VertexStats(); for (Task t : this.tasks.values()) { vertexStats.updateStats(t.getReport()); TezCounters counters = t.getCounters(); - this.fullCounters.incrAllCounters(counters); + aggregateTezCounters.aggrAllCounters(counters); } + return aggregateTezCounters; } private static class RootInputInitFailedTransition implements @@ -3467,7 +3690,7 @@ public VertexState transition(VertexImpl vertex, VertexEvent event) { TezTaskAttemptID taId = completionEvent.getTaskAttemptId(); vertex.vertexManager.onSourceTaskCompleted( getTaskAttemptIdentifier(vertex.dag.getName(), - vertex.dag.getVertex(taId.getTaskID().getVertexID()).getName(), + vertex.dag.getVertex(taId.getVertexID()).getName(), taId)); } catch (AMUserCodeException e) { String msg = "Exception in " + e.getSource() + ", vertex:" + vertex.getLogIdentifier(); @@ -3520,8 +3743,8 @@ public VertexState transition(VertexImpl vertex, VertexEvent event) { Task task = vertex.tasks.get(taskEvent.getTaskID()); if (taskEvent.getState() == TaskState.SUCCEEDED) { taskSucceeded(vertex, task); - if (!vertex.completedTasksStatsCache.containsTask(task.getTaskId())) { - vertex.completedTasksStatsCache.addTask(task.getTaskId()); + if (!vertex.completedTasksStatsCache.containsTask(task.getTaskID())) { + vertex.completedTasksStatsCache.addTask(task.getTaskID()); vertex.completedTasksStatsCache.mergeFrom(((TaskImpl) task).getStatistics()); } } else if (taskEvent.getState() == TaskState.FAILED) { @@ -3553,7 +3776,7 @@ private void taskSucceeded(VertexImpl vertex, Task task) { private void taskFailed(VertexImpl vertex, Task task) { vertex.failedTaskCount++; vertex.addDiagnostic("Task failed" - + ", taskId=" + task.getTaskId() + + ", taskId=" + task.getTaskID() + ", diagnostics=" + task.getDiagnostics()); // TODO Metrics //vertex.metrics.failedTask(task); @@ -3586,6 +3809,36 @@ public VertexState transition(VertexImpl vertex, VertexEvent event) { } } + private static class VertexShuffleDeleteTransition implements + SingleArcTransition { + + @Override + public void transition(VertexImpl vertex, VertexEvent event) { + int incompleteChildrenVertices = vertex.vShuffleDeletionContext.getIncompleteChildrenVertices(); + incompleteChildrenVertices = incompleteChildrenVertices - 1; + vertex.vShuffleDeletionContext.setIncompleteChildrenVertices(incompleteChildrenVertices); + // check if all the child vertices are completed + if (incompleteChildrenVertices == 0) { + LOG.info("Vertex shuffle data deletion for vertex name: " + + vertex.getName() + " with vertex id: " + vertex.getVertexId()); + // Get nodes of all the task attempts in vertex + Set nodes = Sets.newHashSet(); + Map tasksMap = vertex.getTasks(); + tasksMap.keySet().forEach(taskId -> { + Map taskAttemptMap = tasksMap.get(taskId).getAttempts(); + taskAttemptMap.keySet().forEach(attemptId -> { + nodes.add(taskAttemptMap.get(attemptId).getNodeId()); + }); + }); + vertex.appContext.getAppMaster().vertexComplete( + vertex.vertexId, nodes); + } else { + LOG.debug("The number of incomplete child vertex are {} for the vertex {}", + incompleteChildrenVertices, vertex.vertexId); + } + } + } + private static class TaskCompletedAfterVertexSuccessTransition implements MultipleArcTransition { @Override @@ -3662,7 +3915,7 @@ private void commitCompleted(VertexEventCommitCompleted commitCompletedEvent) { } else { String diag = "Commit failed for output:" + commitCompletedEvent.getOutputName() + ", vertexId=" + logIdentifier + ", " - + ExceptionUtils.getStackTrace(commitCompletedEvent.getException());; + + ExceptionUtils.getStackTrace(commitCompletedEvent.getException()); LOG.info(diag); addDiagnostic(diag); trySetTerminationCause(VertexTerminationCause.COMMIT_FAILURE); @@ -3875,6 +4128,17 @@ private void handleRoutedTezEvents(List tezEvents, boolean isPendingEv } EventMetaData sourceMeta = tezEvent.getSourceInfo(); switch(tezEvent.getEventType()) { + case CUSTOM_PROCESSOR_EVENT: + { + // set version as app attempt id + ((CustomProcessorEvent) tezEvent.getEvent()).setVersion( + appContext.getApplicationAttemptId().getAttemptId()); + // route event to task + EventMetaData destinationMeta = tezEvent.getDestinationInfo(); + Task targetTask = getTask(destinationMeta.getTaskAttemptID().getTaskID()); + targetTask.registerTezEvent(tezEvent); + } + break; case INPUT_FAILED_EVENT: case DATA_MOVEMENT_EVENT: case COMPOSITE_DATA_MOVEMENT_EVENT: @@ -3907,7 +4171,7 @@ private void handleRoutedTezEvents(List tezEvents, boolean isPendingEv pendingTaskEvents.add(tezEvent); } else { // event not from this vertex. must have come from source vertex. - int srcTaskIndex = sourceMeta.getTaskAttemptID().getTaskID().getId(); + int srcTaskIndex = sourceMeta.getTaskID().getId(); Vertex edgeVertex = getDAG().getVertex(sourceMeta.getTaskVertexName()); Edge srcEdge = sourceVertices.get(edgeVertex); if (srcEdge == null) { @@ -3949,7 +4213,7 @@ private void handleRoutedTezEvents(List tezEvents, boolean isPendingEv Preconditions.checkArgument(target != null, "Event sent to unkown vertex: " + vmEvent.getTargetVertexName()); TezTaskAttemptID srcTaId = sourceMeta.getTaskAttemptID(); - if (srcTaId.getTaskID().getVertexID().equals(vertexId)) { + if (srcTaId.getVertexID().equals(vertexId)) { // this is the producer tasks' vertex vmEvent.setProducerAttemptIdentifier( getTaskAttemptIdentifier(dag.getName(), getName(), srcTaId)); @@ -4052,11 +4316,9 @@ private static class InternalErrorTransition implements SingleArcTransition { @Override public void transition(VertexImpl vertex, VertexEvent event) { - LOG.error("Invalid event " + event.getType() + " on Vertex " - + vertex.getLogIdentifier()); - vertex.eventHandler.handle(new DAGEventDiagnosticsUpdate( - vertex.getDAGId(), "Invalid event " + event.getType() - + " on Vertex " + vertex.getLogIdentifier())); + String msg = "Invalid event on Vertex " + vertex.getLogIdentifier(); + LOG.error(msg); + vertex.eventHandler.handle(new DAGEventDiagnosticsUpdate(vertex.getDAGId(), msg)); vertex.setFinishTime(); vertex.trySetTerminationCause(VertexTerminationCause.INTERNAL_ERROR); vertex.cancelCommits(); @@ -4165,7 +4427,7 @@ public void setOutputVertices(Map outVertices) { addIO(vertex.getName()); } } finally { - writeLock.unlock();; + writeLock.unlock(); } } @@ -4284,12 +4546,7 @@ public int hashCode() { @Override public Map getInputVertices() { - readLock.lock(); - try { - return Collections.unmodifiableMap(this.sourceVertices); - } finally { - readLock.unlock(); - } + return Collections.unmodifiableMap(this.sourceVertices); } @Override @@ -4345,7 +4602,7 @@ public Resource getTaskResource() { } void addIO(String name) { - ioIndices.put(StringInterner.weakIntern(name), ioIndices.size()); + ioIndices.put(StringInterner.intern(name), ioIndices.size()); } @VisibleForTesting @@ -4490,6 +4747,11 @@ VertexManager getVertexManager() { return this.vertexManager; } + public boolean isVertexInitSkipped() { + return isVertexInitSkipped; + } + + private static void logLocationHints(String vertexName, VertexLocationHint locationHint) { if (locationHint == null) { @@ -4550,11 +4812,9 @@ public NoOpVertexManager(VertexManagerPluginContext context) { @Override public void initialize() throws Exception { - if (LOG.isDebugEnabled()) { - LOG.debug("initialize NoOpVertexManager"); - } + LOG.debug("initialize NoOpVertexManager"); configurationDoneEvent = new VertexConfigurationDoneEvent(); - configurationDoneEvent.fromProtoStream(new NonSyncByteArrayInputStream(getContext().getUserPayload().deepCopyAsArray())); + configurationDoneEvent.fromProtoStream(CodedInputStream.newInstance(getContext().getUserPayload().deepCopyAsArray())); String vertexName = getContext().getVertexName(); if (getContext().getVertexNumTasks(vertexName) == -1) { Preconditions.checkArgument(configurationDoneEvent.isSetParallelismCalled(), "SetParallelism must be called " @@ -4578,9 +4838,7 @@ public void onVertexStarted(List completions) } getContext().doneReconfiguringVertex(); int numTasks = getContext().getVertexNumTasks(getContext().getVertexName()); - if (LOG.isDebugEnabled()) { - LOG.debug("Schedule all the tasks, numTask=" + numTasks); - } + LOG.debug("Schedule all the tasks, numTask={}", numTasks); List tasks = new ArrayList(); for (int i=0;i> getDownstreamBlamingHosts(){ + return downstreamBlamingHosts; + } + + /** + * Initialize context from vertex shuffle deletion. + * @param deletionHeight + */ + public void initShuffleDeletionContext(int deletionHeight) { + VertexShuffleDataDeletionContext vShuffleDeletionContext = new VertexShuffleDataDeletionContext(deletionHeight); + vShuffleDeletionContext.setSpannedVertices(this); + this.vShuffleDeletionContext = vShuffleDeletionContext; } } diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/VertexManager.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/VertexManager.java index b7d3428c6b..c3d49ea840 100644 --- a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/VertexManager.java +++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/VertexManager.java @@ -18,7 +18,7 @@ package org.apache.tez.dag.app.dag.impl; -import static com.google.common.base.Preconditions.checkNotNull; + import java.lang.reflect.UndeclaredThrowableException; import java.security.PrivilegedExceptionAction; @@ -31,11 +31,15 @@ import java.util.concurrent.BlockingQueue; import java.util.concurrent.LinkedBlockingQueue; import java.util.concurrent.atomic.AtomicBoolean; +import java.util.Objects; import javax.annotation.Nullable; import org.apache.commons.lang.exception.ExceptionUtils; import org.apache.tez.dag.app.dag.event.DAGEventInternalError; +import org.apache.tez.dag.app.dag.event.VertexEventRouteEvent; +import org.apache.tez.dag.records.TezTaskAttemptID; +import org.apache.tez.runtime.api.events.CustomProcessorEvent; import org.apache.tez.runtime.api.impl.GroupInputSpec; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -74,7 +78,8 @@ import org.apache.tez.runtime.api.impl.EventMetaData.EventProducerConsumerType; import com.google.common.base.Function; -import com.google.common.base.Preconditions; +import org.apache.tez.common.GuavaShim; +import org.apache.tez.common.Preconditions; import com.google.common.collect.Collections2; import com.google.common.collect.Lists; import com.google.common.collect.Maps; @@ -221,7 +226,7 @@ public synchronized void scheduleTasks(List tasks) { @Override public synchronized void scheduleVertexTasks(List tasks) { checkAndThrowIfDone(); - List schedTasks = new ArrayList(tasks.size()); + List schedTasks = new ArrayList<>(tasks.size()); for (TaskWithLocationHint task : tasks) { schedTasks.add(ScheduleTaskRequest.create( task.getTaskIndex(), task.getTaskLocationHint())); @@ -272,11 +277,34 @@ public TezEvent apply(InputDataInformationEvent riEvent) { // Recovery handling is taken care of by the Vertex. } + @Override + public void sendEventToProcessor(Collection events, int taskId) { + checkAndThrowIfDone(); + Preconditions.checkArgument(taskId >= 0 && taskId < managedVertex.getTotalTasks(), + "Invalid taskId " + taskId + "; " + "There are " + managedVertex.getTotalTasks() + + " tasks in total."); + + if (events != null && events.size() > 0) { + List tezEvents = new ArrayList<>(); + for (CustomProcessorEvent event : events) { + TezEvent tezEvent = new TezEvent(event, null); + // use dummy task attempt id since this is not an task attempt specific event and task + // attempt id won't be used anyway + EventMetaData destinationMeta = new EventMetaData(EventProducerConsumerType.PROCESSOR, + managedVertex.getName(), managedVertex.getName(), + TezTaskAttemptID.getInstance(managedVertex.getTask(taskId).getTaskID(), -1)); + tezEvent.setDestinationInfo(destinationMeta); + tezEvents.add(tezEvent); + } + appContext.getEventHandler().handle( + new VertexEventRouteEvent(managedVertex.getVertexId(), tezEvents)); + } + } @Override public synchronized void setVertexLocationHint(VertexLocationHint locationHint) { checkAndThrowIfDone(); - Preconditions.checkNotNull(locationHint, "locationHint is null"); + Objects.requireNonNull(locationHint, "locationHint is null"); managedVertex.setVertexLocationHint(locationHint); } @@ -384,10 +412,10 @@ public void onStateUpdated(VertexStateUpdate event) { public VertexManager(VertexManagerPluginDescriptor pluginDesc, UserGroupInformation dagUgi, Vertex managedVertex, AppContext appContext, StateChangeNotifier stateChangeNotifier) throws TezException { - checkNotNull(pluginDesc, "pluginDesc is null"); - checkNotNull(managedVertex, "managedVertex is null"); - checkNotNull(appContext, "appContext is null"); - checkNotNull(stateChangeNotifier, "notifier is null"); + Objects.requireNonNull(pluginDesc, "pluginDesc is null"); + Objects.requireNonNull(managedVertex, "managedVertex is null"); + Objects.requireNonNull(appContext, "appContext is null"); + Objects.requireNonNull(stateChangeNotifier, "notifier is null"); this.pluginDesc = pluginDesc; this.dagUgi = dagUgi; this.managedVertex = managedVertex; @@ -397,7 +425,6 @@ public VertexManager(VertexManagerPluginDescriptor pluginDesc, UserGroupInformat this.rootInputInitEventQueue = new LinkedBlockingQueue(); pluginContext = new VertexManagerPluginContextImpl(); - Preconditions.checkArgument(pluginDesc != null); payload = pluginDesc.getUserPayload(); pluginFailed = new AtomicBoolean(false); plugin = ReflectionUtils.createClazzInstance(pluginDesc.getClassName(), @@ -461,7 +488,7 @@ private void tryScheduleNextEvent() { VertexManagerEvent e = eventQueue.poll(); if (e != null) { ListenableFuture future = execService.submit(e); - Futures.addCallback(future, e.getCallback()); + Futures.addCallback(future, e.getCallback(), GuavaShim.directExecutor()); } else { // This may happen. Lets say Callback succeeded on threadA. It set eventInFlight to false // and called tryScheduleNextEvent() and found queue not empty but got paused before it @@ -544,7 +571,7 @@ private void sendInternalError(Exception e) { // state change must be triggered via an event transition LOG.error("Error after vertex manager callback " + managedVertex.getLogIdentifier(), e); appContext.getEventHandler().handle( - (new DAGEventInternalError(managedVertex.getVertexId().getDAGId(), + (new DAGEventInternalError(managedVertex.getVertexId().getDAGID(), "Error in VertexManager for vertex: " + managedVertex.getLogIdentifier() + ", error=" + ExceptionUtils.getStackTrace(e)))); } diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/VertexShuffleDataDeletionContext.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/VertexShuffleDataDeletionContext.java new file mode 100644 index 0000000000..4ffdf11231 --- /dev/null +++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/VertexShuffleDataDeletionContext.java @@ -0,0 +1,96 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tez.dag.app.dag.impl; + +import org.apache.tez.dag.app.dag.Vertex; + +import java.util.HashSet; +import java.util.Set; + +public class VertexShuffleDataDeletionContext { + private int deletionHeight; + private int incompleteChildrenVertices; + private Set ancestors; + private Set children; + + public VertexShuffleDataDeletionContext(int deletionHeight) { + this.deletionHeight = deletionHeight; + this.incompleteChildrenVertices = 0; + this.ancestors = new HashSet<>(); + this.children = new HashSet<>(); + } + + public void setSpannedVertices(Vertex vertex) { + getSpannedVerticesAncestors(vertex, ancestors, deletionHeight); + getSpannedVerticesChildren(vertex, children, deletionHeight); + setIncompleteChildrenVertices(children.size()); + } + + /** + * get all the ancestor vertices at a particular depth. + */ + private static void getSpannedVerticesAncestors(Vertex vertex, Set ancestorVertices, int level) { + if (level == 0) { + ancestorVertices.add(vertex); + return; + } + + if (level == 1) { + ancestorVertices.addAll(vertex.getInputVertices().keySet()); + return; + } + + vertex.getInputVertices().forEach((inVertex, edge) -> getSpannedVerticesAncestors(inVertex, ancestorVertices, + level - 1)); + } + + /** + * get all the child vertices at a particular depth. + */ + private static void getSpannedVerticesChildren(Vertex vertex, Set childVertices, int level) { + if (level == 0) { + childVertices.add(vertex); + return; + } + + if (level == 1) { + childVertices.addAll(vertex.getOutputVertices().keySet()); + return; + } + + vertex.getOutputVertices().forEach((outVertex, edge) -> getSpannedVerticesChildren(outVertex, childVertices, + level - 1)); + } + + public void setIncompleteChildrenVertices(int incompleteChildrenVertices) { + this.incompleteChildrenVertices = incompleteChildrenVertices; + } + + public int getIncompleteChildrenVertices() { + return this.incompleteChildrenVertices; + } + + public Set getAncestors() { + return this.ancestors; + } + + public Set getChildren() { + return this.children; + } +} diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/speculation/legacy/DataStatistics.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/speculation/legacy/DataStatistics.java index 7e6f1c2c82..bbfb950741 100644 --- a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/speculation/legacy/DataStatistics.java +++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/speculation/legacy/DataStatistics.java @@ -21,6 +21,11 @@ import com.google.common.annotations.VisibleForTesting; public class DataStatistics { + /** + * factor used to calculate confidence interval within 95%. + */ + private static final double DEFAULT_CI_FACTOR = 1.96; + private int count = 0; private double sum = 0; private double sumSquares = 0; @@ -79,8 +84,24 @@ public synchronized double count() { return count; } + /** + * calculates the mean value within 95% ConfidenceInterval. 1.96 is standard + * for 95%. + * + * @return the mean value adding 95% confidence interval. + */ + public synchronized double meanCI() { + if (count <= 1) { + return 0.0; + } + double currMean = mean(); + double currStd = std(); + return currMean + (DEFAULT_CI_FACTOR * currStd / Math.sqrt(count)); + } + public String toString() { - return "DataStatistics: count is " + count + ", sum is " + sum + - ", sumSquares is " + sumSquares + " mean is " + mean() + " std() is " + std(); + return "DataStatistics: count is " + count + ", sum is " + sum + + ", sumSquares is " + sumSquares + " mean is " + mean() + + " std() is " + std() + ", meanCI() is " + meanCI(); } } \ No newline at end of file diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/speculation/legacy/LegacySpeculator.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/speculation/legacy/LegacySpeculator.java index dd54d8662b..940bb23025 100644 --- a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/speculation/legacy/LegacySpeculator.java +++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/speculation/legacy/LegacySpeculator.java @@ -18,12 +18,20 @@ package org.apache.tez.dag.app.dag.speculation.legacy; +import java.lang.reflect.Constructor; +import java.lang.reflect.InvocationTargetException; import java.util.HashSet; import java.util.Map; import java.util.Set; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentMap; +import com.google.common.annotations.VisibleForTesting; +import java.util.concurrent.locks.ReadWriteLock; +import java.util.concurrent.locks.ReentrantReadWriteLock; +import org.apache.hadoop.service.AbstractService; +import org.apache.tez.common.ProgressHelper; +import org.apache.tez.dag.api.TezConfiguration; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; @@ -39,8 +47,6 @@ import org.apache.tez.dag.records.TezTaskAttemptID; import org.apache.tez.dag.records.TezTaskID; -import com.google.common.base.Preconditions; - /** * Maintains runtime estimation statistics. Makes periodic updates * estimates based on progress and decides on when to trigger a @@ -52,7 +58,7 @@ * because it may be likely a wasted attempt. There is a delay between * successive speculations. */ -public class LegacySpeculator { +public class LegacySpeculator extends AbstractService { private static final long ON_SCHEDULE = Long.MIN_VALUE; private static final long ALREADY_SPECULATING = Long.MIN_VALUE + 1; @@ -61,18 +67,19 @@ public class LegacySpeculator { private static final long NOT_RUNNING = Long.MIN_VALUE + 4; private static final long TOO_LATE_TO_SPECULATE = Long.MIN_VALUE + 5; - private static final long SOONEST_RETRY_AFTER_NO_SPECULATE = 1000L * 1L; - private static final long SOONEST_RETRY_AFTER_SPECULATE = 1000L * 15L; + private final long soonestRetryAfterNoSpeculate; + private final long soonestRetryAfterSpeculate; - private static final double PROPORTION_RUNNING_TASKS_SPECULATABLE = 0.1; - private static final double PROPORTION_TOTAL_TASKS_SPECULATABLE = 0.01; - private static final int MINIMUM_ALLOWED_SPECULATIVE_TASKS = 10; + private final double proportionRunningTasksSpeculatable; + private final double proportionTotalTasksSpeculatable; + private final int minimumAllowedSpeculativeTasks; + private static final int VERTEX_SIZE_THRESHOLD_FOR_TIMEOUT_SPECULATION = 1; private static final Logger LOG = LoggerFactory.getLogger(LegacySpeculator.class); private final ConcurrentMap runningTasks = new ConcurrentHashMap(); - + private ReadWriteLock lock = new ReentrantReadWriteLock(); // Used to track any TaskAttempts that aren't heart-beating for a while, so // that we can aggressively speculate instead of waiting for task-timeout. private final ConcurrentMap @@ -83,14 +90,29 @@ public class LegacySpeculator { // in progress. private static final long MAX_WAITTING_TIME_FOR_HEARTBEAT = 9 * 1000; - private final Set mayHaveSpeculated = new HashSet(); private Vertex vertex; private TaskRuntimeEstimator estimator; - + private final long taskTimeout; private final Clock clock; - private long nextSpeculateTime = Long.MIN_VALUE; + private Thread speculationBackgroundThread = null; + private volatile boolean stopped = false; + + @VisibleForTesting + public int getMinimumAllowedSpeculativeTasks() { return minimumAllowedSpeculativeTasks;} + + @VisibleForTesting + public double getProportionTotalTasksSpeculatable() { return proportionTotalTasksSpeculatable;} + + @VisibleForTesting + public double getProportionRunningTasksSpeculatable() { return proportionRunningTasksSpeculatable;} + + @VisibleForTesting + public long getSoonestRetryAfterNoSpeculate() { return soonestRetryAfterNoSpeculate;} + + @VisibleForTesting + public long getSoonestRetryAfterSpeculate() { return soonestRetryAfterSpeculate;} public LegacySpeculator(Configuration conf, AppContext context, Vertex vertex) { this(conf, context.getClock(), vertex); @@ -102,81 +124,174 @@ public LegacySpeculator(Configuration conf, Clock clock, Vertex vertex) { static private TaskRuntimeEstimator getEstimator (Configuration conf, Vertex vertex) { - TaskRuntimeEstimator estimator = new LegacyTaskRuntimeEstimator(); - estimator.contextualize(conf, vertex); - + TaskRuntimeEstimator estimator; + Class estimatorClass = + conf.getClass(TezConfiguration.TEZ_AM_TASK_ESTIMATOR_CLASS, + LegacyTaskRuntimeEstimator.class, + TaskRuntimeEstimator.class); + try { + Constructor estimatorConstructor + = estimatorClass.getConstructor(); + estimator = estimatorConstructor.newInstance(); + estimator.contextualize(conf, vertex); + } catch (NoSuchMethodException e) { + LOG.error("Can't make a speculation runtime estimator", e); + throw new RuntimeException(e); + } catch (IllegalAccessException e) { + LOG.error("Can't make a speculation runtime estimator", e); + throw new RuntimeException(e); + } catch (InstantiationException e) { + LOG.error("Can't make a speculation runtime estimator", e); + throw new RuntimeException(e); + } catch (InvocationTargetException e) { + LOG.error("Can't make a speculation runtime estimator", e); + throw new RuntimeException(e); + } return estimator; } + @Override + protected void serviceStart() throws Exception { + lock.writeLock().lock(); + try { + assert (speculationBackgroundThread == null); + + if (speculationBackgroundThread == null) { + speculationBackgroundThread = + new Thread(createThread(), + "DefaultSpeculator background processing"); + speculationBackgroundThread.start(); + } + super.serviceStart(); + } catch (Exception e) { + LOG.warn("Speculator thread could not launch", e); + } finally { + lock.writeLock().unlock(); + } + } + + public boolean isStarted() { + boolean result = false; + lock.readLock().lock(); + try { + if (this.speculationBackgroundThread != null) { + result = getServiceState().equals(STATE.STARTED); + } + } finally { + lock.readLock().unlock(); + } + return result; + } + // This constructor is designed to be called by other constructors. // However, it's public because we do use it in the test cases. // Normally we figure out our own estimator. public LegacySpeculator (Configuration conf, TaskRuntimeEstimator estimator, Clock clock, Vertex vertex) { + super(LegacySpeculator.class.getName()); this.vertex = vertex; this.estimator = estimator; this.clock = clock; + taskTimeout = conf.getLong( + TezConfiguration.TEZ_AM_LEGACY_SPECULATIVE_SINGLE_TASK_VERTEX_TIMEOUT, + TezConfiguration.TEZ_AM_LEGACY_SPECULATIVE_SINGLE_TASK_VERTEX_TIMEOUT_DEFAULT); + soonestRetryAfterNoSpeculate = conf.getLong( + TezConfiguration.TEZ_AM_SOONEST_RETRY_AFTER_NO_SPECULATE, + TezConfiguration.TEZ_AM_SOONEST_RETRY_AFTER_NO_SPECULATE_DEFAULT); + soonestRetryAfterSpeculate = conf.getLong( + TezConfiguration.TEZ_AM_SOONEST_RETRY_AFTER_SPECULATE, + TezConfiguration.TEZ_AM_SOONEST_RETRY_AFTER_SPECULATE_DEFAULT); + proportionRunningTasksSpeculatable = conf.getDouble( + TezConfiguration.TEZ_AM_PROPORTION_RUNNING_TASKS_SPECULATABLE, + TezConfiguration.TEZ_AM_PROPORTION_RUNNING_TASKS_SPECULATABLE_DEFAULT); + proportionTotalTasksSpeculatable = conf.getDouble( + TezConfiguration.TEZ_AM_PROPORTION_TOTAL_TASKS_SPECULATABLE, + TezConfiguration.TEZ_AM_PROPORTION_TOTAL_TASKS_SPECULATABLE_DEFAULT); + minimumAllowedSpeculativeTasks = conf.getInt( + TezConfiguration.TEZ_AM_MINIMUM_ALLOWED_SPECULATIVE_TASKS, + TezConfiguration.TEZ_AM_MINIMUM_ALLOWED_SPECULATIVE_TASKS_DEFAULT); } -/* ************************************************************* */ - - void maybeSpeculate() { - long now = clock.getTime(); - - if (now < nextSpeculateTime) { - return; - } - - int speculations = maybeScheduleASpeculation(); - long mininumRecomp - = speculations > 0 ? SOONEST_RETRY_AFTER_SPECULATE - : SOONEST_RETRY_AFTER_NO_SPECULATE; - - long wait = Math.max(mininumRecomp, - clock.getTime() - now); - nextSpeculateTime = now + wait; - - if (speculations > 0) { - LOG.info("We launched " + speculations - + " speculations. Waiting " + wait + " milliseconds."); + @Override + protected void serviceStop() throws Exception { + lock.writeLock().lock(); + try { + stopped = true; + // this could be called before background thread is established + if (speculationBackgroundThread != null) { + speculationBackgroundThread.interrupt(); + } + super.serviceStop(); + speculationBackgroundThread = null; + } finally { + lock.writeLock().unlock(); } } + public Runnable createThread() { + return new Runnable() { + @Override + public void run() { + while (!stopped && !Thread.currentThread().isInterrupted()) { + long backgroundRunStartTime = clock.getTime(); + try { + int speculations = computeSpeculations(); + long nextRecompTime = speculations > 0 ? soonestRetryAfterSpeculate + : soonestRetryAfterNoSpeculate; + long wait = Math.max(nextRecompTime, clock.getTime() - backgroundRunStartTime); + if (speculations > 0) { + LOG.info("We launched " + speculations + + " speculations. Waiting " + wait + " milliseconds before next evaluation."); + } else { + LOG.debug("Waiting {} milliseconds before next evaluation.", wait); + } + Thread.sleep(wait); + } catch (InterruptedException ie) { + if (!stopped) { + LOG.warn("Speculator thread interrupted", ie); + } + } + } + } + }; + } + /* ************************************************************* */ public void notifyAttemptStarted(TezTaskAttemptID taId, long timestamp) { estimator.enrollAttempt(taId, timestamp); } - - public void notifyAttemptStatusUpdate(TezTaskAttemptID taId, TaskAttemptState reportedState, + + public void notifyAttemptStatusUpdate(TezTaskAttemptID taId, + TaskAttemptState reportedState, long timestamp) { statusUpdate(taId, reportedState, timestamp); - maybeSpeculate(); } /** * Absorbs one TaskAttemptStatus * - * @param reportedStatus the status report that we got from a task attempt + * @param reportedState the status report that we got from a task attempt * that we want to fold into the speculation data for this job * @param timestamp the time this status corresponds to. This matters * because statuses contain progress. */ - private void statusUpdate(TezTaskAttemptID attemptID, TaskAttemptState reportedState, long timestamp) { + private void statusUpdate(TezTaskAttemptID attemptID, + TaskAttemptState reportedState, long timestamp) { TezTaskID taskID = attemptID.getTaskID(); Task task = vertex.getTask(taskID); - Preconditions.checkState(task != null, "Null task for attempt: " + attemptID); + if (task == null) { + return; + } estimator.updateAttempt(attemptID, reportedState, timestamp); - //if (stateString.equals(TaskAttemptState.RUNNING.name())) { if (reportedState == TaskAttemptState.RUNNING) { runningTasks.putIfAbsent(taskID, Boolean.TRUE); } else { runningTasks.remove(taskID, Boolean.TRUE); - //if (!stateString.equals(TaskAttemptState.STARTING.name())) { if (reportedState == TaskAttemptState.STARTING) { runningTaskAttemptStatistics.remove(attemptID); } @@ -209,37 +324,42 @@ public void handle(SpeculatorEvent event) { // // All of these values are negative. Any value that should be allowed to // speculate is 0 or positive. - private long speculationValue(Task task, long now) { + // + // If shouldUseTimeout is true, we will use timeout to decide on + // speculation instead of the task statistics. This can be useful, for + // example for single task vertices for which there are no tasks to compare + // with + private long speculationValue(Task task, long now, boolean shouldUseTimeout) { Map attempts = task.getAttempts(); - TezTaskID taskID = task.getTaskId(); + TezTaskID taskID = task.getTaskID(); long acceptableRuntime = Long.MIN_VALUE; long result = Long.MIN_VALUE; // short circuit completed tasks. no need to spend time on them if (task.getState() == TaskState.SUCCEEDED) { + // remove the task from may have speculated if it exists + mayHaveSpeculated.remove(taskID); return NOT_RUNNING; } - - if (!mayHaveSpeculated.contains(taskID)) { + + if (!mayHaveSpeculated.contains(taskID) && !shouldUseTimeout) { acceptableRuntime = estimator.thresholdRuntime(taskID); if (acceptableRuntime == Long.MAX_VALUE) { return ON_SCHEDULE; } } - TezTaskAttemptID runningTaskAttemptID = null; - + TezTaskAttemptID runningTaskAttemptID; int numberRunningAttempts = 0; for (TaskAttempt taskAttempt : attempts.values()) { - if (taskAttempt.getState() == TaskAttemptState.RUNNING - || taskAttempt.getState() == TaskAttemptState.STARTING) { + TaskAttemptState taskAttemptState = taskAttempt.getState(); + if (taskAttemptState == TaskAttemptState.RUNNING + || taskAttemptState == TaskAttemptState.STARTING) { if (++numberRunningAttempts > 1) { return ALREADY_SPECULATING; } - runningTaskAttemptID = taskAttempt.getID(); - - long estimatedRunTime = estimator.estimatedRuntime(runningTaskAttemptID); + runningTaskAttemptID = taskAttempt.getTaskAttemptID(); long taskAttemptStartTime = estimator.attemptEnrolledTime(runningTaskAttemptID); @@ -249,43 +369,62 @@ private long speculationValue(Task task, long now) { return TOO_NEW; } - long estimatedEndTime = estimatedRunTime + taskAttemptStartTime; + if (shouldUseTimeout) { + if ((now - taskAttemptStartTime) > taskTimeout) { + // If the task has timed out, then we want to schedule a speculation + // immediately. However we cannot return immediately since we may + // already have a speculation running. + result = Long.MAX_VALUE; + } else { + // Task has not timed out so we are good + return ON_SCHEDULE; + } + } else { + long estimatedRunTime = estimator + .estimatedRuntime(runningTaskAttemptID); - long estimatedReplacementEndTime - = now + estimator.newAttemptEstimatedRuntime(); + long estimatedEndTime = estimatedRunTime + taskAttemptStartTime; - float progress = taskAttempt.getProgress(); - TaskAttemptHistoryStatistics data = - runningTaskAttemptStatistics.get(runningTaskAttemptID); - if (data == null) { - runningTaskAttemptStatistics.put(runningTaskAttemptID, - new TaskAttemptHistoryStatistics(estimatedRunTime, progress, now)); - } else { - if (estimatedRunTime == data.getEstimatedRunTime() - && progress == data.getProgress()) { - // Previous stats are same as same stats - if (data.notHeartbeatedInAWhile(now)) { - // Stats have stagnated for a while, simulate heart-beat. - // Now simulate the heart-beat - statusUpdate(taskAttempt.getID(), taskAttempt.getState(), clock.getTime()); - } + long estimatedReplacementEndTime + = now + estimator.newAttemptEstimatedRuntime(); + + float progress = taskAttempt.getProgress(); + TaskAttemptHistoryStatistics data = + runningTaskAttemptStatistics.get(runningTaskAttemptID); + if (data == null) { + runningTaskAttemptStatistics.put(runningTaskAttemptID, + new TaskAttemptHistoryStatistics(estimatedRunTime, progress, + now)); } else { - // Stats have changed - update our data structure - data.setEstimatedRunTime(estimatedRunTime); - data.setProgress(progress); - data.resetHeartBeatTime(now); + if (estimatedRunTime == data.getEstimatedRunTime() + && progress == data.getProgress()) { + // Previous stats are same as same stats + if (data.notHeartbeatedInAWhile(now) + || estimator + .hasStagnatedProgress(runningTaskAttemptID, now)) { + // Stats have stagnated for a while, simulate heart-beat. + // Now simulate the heart-beat + statusUpdate(taskAttempt.getTaskAttemptID(), taskAttempt.getState(), + clock.getTime()); + } + } else { + // Stats have changed - update our data structure + data.setEstimatedRunTime(estimatedRunTime); + data.setProgress(progress); + data.resetHeartBeatTime(now); + } } - } - if (estimatedEndTime < now) { - return PROGRESS_IS_GOOD; - } + if (estimatedEndTime < now) { + return PROGRESS_IS_GOOD; + } - if (estimatedReplacementEndTime >= estimatedEndTime) { - return TOO_LATE_TO_SPECULATE; - } + if (estimatedReplacementEndTime >= estimatedEndTime) { + return TOO_LATE_TO_SPECULATE; + } - result = estimatedEndTime - estimatedReplacementEndTime; + result = estimatedEndTime - estimatedReplacementEndTime; + } } } @@ -294,9 +433,7 @@ private long speculationValue(Task task, long now) { return NOT_RUNNING; } - - - if (acceptableRuntime == Long.MIN_VALUE) { + if ((acceptableRuntime == Long.MIN_VALUE) && !shouldUseTimeout) { acceptableRuntime = estimator.thresholdRuntime(taskID); if (acceptableRuntime == Long.MAX_VALUE) { return ON_SCHEDULE; @@ -306,14 +443,15 @@ private long speculationValue(Task task, long now) { return result; } - //Add attempt to a given Task. + // Add attempt to a given Task. protected void addSpeculativeAttempt(TezTaskID taskID) { - LOG.info("DefaultSpeculator.addSpeculativeAttempt -- we are speculating " + taskID); + LOG.info("DefaultSpeculator.addSpeculativeAttempt -- we are speculating " + + taskID); vertex.scheduleSpeculativeTask(taskID); mayHaveSpeculated.add(taskID); } - private int maybeScheduleASpeculation() { + int computeSpeculations() { int successes = 0; long now = clock.getTime(); @@ -324,16 +462,19 @@ private int maybeScheduleASpeculation() { Map tasks = vertex.getTasks(); int numberAllowedSpeculativeTasks - = (int) Math.max(MINIMUM_ALLOWED_SPECULATIVE_TASKS, - PROPORTION_TOTAL_TASKS_SPECULATABLE * tasks.size()); - + = (int) Math.max(minimumAllowedSpeculativeTasks, + proportionTotalTasksSpeculatable * tasks.size()); TezTaskID bestTaskID = null; long bestSpeculationValue = -1L; + boolean shouldUseTimeout = + (tasks.size() <= VERTEX_SIZE_THRESHOLD_FOR_TIMEOUT_SPECULATION) && + (taskTimeout >= 0); // this loop is potentially pricey. // TODO track the tasks that are potentially worth looking at for (Map.Entry taskEntry : tasks.entrySet()) { - long mySpeculationValue = speculationValue(taskEntry.getValue(), now); + long mySpeculationValue = speculationValue(taskEntry.getValue(), now, + shouldUseTimeout); if (mySpeculationValue == ALREADY_SPECULATING) { ++numberSpeculationsAlready; @@ -350,7 +491,7 @@ private int maybeScheduleASpeculation() { } numberAllowedSpeculativeTasks = (int) Math.max(numberAllowedSpeculativeTasks, - PROPORTION_RUNNING_TASKS_SPECULATABLE * numberRunningTasks); + proportionRunningTasksSpeculatable * numberRunningTasks); // If we found a speculation target, fire it off if (bestTaskID != null @@ -358,7 +499,6 @@ private int maybeScheduleASpeculation() { addSpeculativeAttempt(bestTaskID); ++successes; } - return successes; } @@ -388,6 +528,12 @@ public void setEstimatedRunTime(long estimatedRunTime) { } public void setProgress(float progress) { + if (LOG.isDebugEnabled()) { + if (!ProgressHelper.isProgressWithinRange(progress)) { + LOG.debug("Progress update: speculator received progress in invalid " + + "range={}", progress); + } + } this.progress = progress; } diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/speculation/legacy/SimpleExponentialTaskRuntimeEstimator.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/speculation/legacy/SimpleExponentialTaskRuntimeEstimator.java new file mode 100644 index 0000000000..b61f153e6b --- /dev/null +++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/speculation/legacy/SimpleExponentialTaskRuntimeEstimator.java @@ -0,0 +1,194 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tez.dag.app.dag.speculation.legacy; + +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ConcurrentMap; +import java.util.concurrent.atomic.AtomicReference; +import org.apache.hadoop.conf.Configuration; +import org.apache.tez.dag.api.TezConfiguration; +import org.apache.tez.dag.api.oldrecords.TaskAttemptState; +import org.apache.tez.dag.app.dag.Task; +import org.apache.tez.dag.app.dag.TaskAttempt; +import org.apache.tez.dag.app.dag.Vertex; +import org.apache.tez.dag.app.dag.speculation.legacy.forecast.SimpleExponentialSmoothing; +import org.apache.tez.dag.records.TezTaskAttemptID; + +/** + * A task Runtime Estimator based on exponential smoothing. + */ +public class SimpleExponentialTaskRuntimeEstimator extends StartEndTimesBase { + /** + * The default value returned by the estimator when no records exist. + */ + private static final long DEFAULT_ESTIMATE_RUNTIME = -1L; + + /** + * Given a forecast of value 0.0, it is getting replaced by the default value + * to avoid division by 0. + */ + private static final double DEFAULT_PROGRESS_VALUE = 1E-10; + + /** + * Factor used to calculate the confidence interval. + */ + private static final double CONFIDENCE_INTERVAL_FACTOR = 0.25; + /** + * Constant time used to calculate the smoothing exponential factor. + */ + private long constTime; + + /** + * Number of readings before we consider the estimate stable. + * Otherwise, the estimate will be skewed due to the initial estimate + */ + private int skipCount; + + /** + * Time window to automatically update the count of the skipCount. This is + * needed when a task stalls without any progress, causing the estimator to + * return -1 as an estimatedRuntime. + */ + private long stagnatedWindow; + + /** + * A map of TA Id to the statistic model of smooth exponential. + */ + private final ConcurrentMap> + estimates = new ConcurrentHashMap<>(); + + private SimpleExponentialSmoothing getForecastEntry( + final TezTaskAttemptID attemptID) { + AtomicReference entryRef = estimates + .get(attemptID); + if (entryRef == null) { + return null; + } + return entryRef.get(); + } + + private void incorporateReading(final TezTaskAttemptID attemptID, + final float newRawData, final long newTimeStamp) { + SimpleExponentialSmoothing foreCastEntry = getForecastEntry(attemptID); + if (foreCastEntry == null) { + Long tStartTime = startTimes.get(attemptID); + // skip if the startTime is not set yet + if (tStartTime == null) { + return; + } + estimates.putIfAbsent(attemptID, + new AtomicReference<>(SimpleExponentialSmoothing.createForecast( + constTime, skipCount, stagnatedWindow, + tStartTime - 1))); + incorporateReading(attemptID, newRawData, newTimeStamp); + return; + } + foreCastEntry.incorporateReading(newTimeStamp, newRawData); + } + + @Override + public void contextualize(final Configuration conf, final Vertex vertex) { + super.contextualize(conf, vertex); + + constTime + = conf.getLong(TezConfiguration.TEZ_AM_ESTIMATOR_EXPONENTIAL_LAMBDA_MS, + TezConfiguration.TEZ_AM_ESTIMATOR_EXPONENTIAL_LAMBDA_MS_DEFAULT); + + stagnatedWindow = Math.max(2 * constTime, conf.getLong( + TezConfiguration.TEZ_AM_ESTIMATOR_EXPONENTIAL_STAGNATED_MS, + TezConfiguration.TEZ_AM_ESTIMATOR_EXPONENTIAL_STAGNATED_MS_DEFAULT)); + + skipCount = conf + .getInt(TezConfiguration.TEZ_AM_ESTIMATOR_EXPONENTIAL_SKIP_INITIALS, + TezConfiguration + .TEZ_AM_ESTIMATOR_EXPONENTIAL_SKIP_INITIALS_DEFAULT); + } + + @Override + public long estimatedRuntime(final TezTaskAttemptID id) { + SimpleExponentialSmoothing foreCastEntry = getForecastEntry(id); + if (foreCastEntry == null) { + return DEFAULT_ESTIMATE_RUNTIME; + } + double remainingWork = + Math.max(0.0, Math.min(1.0, 1.0 - foreCastEntry.getRawData())); + double forecast = + Math.max(DEFAULT_PROGRESS_VALUE, foreCastEntry.getForecast()); + long remainingTime = (long) (remainingWork / forecast); + long estimatedRuntime = + remainingTime + foreCastEntry.getTimeStamp() - foreCastEntry.getStartTime(); + return estimatedRuntime; + } + + @Override + public long newAttemptEstimatedRuntime() { + if (taskStatistics == null) { + return DEFAULT_ESTIMATE_RUNTIME; + } + + double statsMeanCI = taskStatistics.meanCI(); + double expectedVal = + statsMeanCI + Math.min(statsMeanCI * CONFIDENCE_INTERVAL_FACTOR, + taskStatistics.std() / 2); + return (long) (expectedVal); + } + + @Override + public boolean hasStagnatedProgress(final TezTaskAttemptID id, + final long timeStamp) { + SimpleExponentialSmoothing foreCastEntry = getForecastEntry(id); + if (foreCastEntry == null) { + return false; + } + return foreCastEntry.isDataStagnated(timeStamp); + } + + @Override + public long runtimeEstimateVariance(final TezTaskAttemptID id) { + SimpleExponentialSmoothing forecastEntry = getForecastEntry(id); + if (forecastEntry == null) { + return DEFAULT_ESTIMATE_RUNTIME; + } + double forecast = forecastEntry.getForecast(); + if (forecastEntry.isDefaultForecast(forecast)) { + return DEFAULT_ESTIMATE_RUNTIME; + } + //TODO What is the best way to measure variance in runtime + return 0L; + } + + + @Override + public void updateAttempt(final TezTaskAttemptID attemptID, + final TaskAttemptState state, + final long timestamp) { + super.updateAttempt(attemptID, state, timestamp); + Task task = vertex.getTask(attemptID.getTaskID()); + if (task == null) { + return; + } + TaskAttempt taskAttempt = task.getAttempt(attemptID); + if (taskAttempt == null) { + return; + } + float progress = taskAttempt.getProgress(); + incorporateReading(attemptID, progress, timestamp); + } +} + diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/speculation/legacy/StartEndTimesBase.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/speculation/legacy/StartEndTimesBase.java index d4d1a7ff2c..3083986d9c 100644 --- a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/speculation/legacy/StartEndTimesBase.java +++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/speculation/legacy/StartEndTimesBase.java @@ -35,13 +35,11 @@ /** * Base class that uses the attempt runtime estimations from a derived class * and uses it to determine outliers based on deviating beyond the mean - * estimated runtime by some threshold + * estimated runtime by some threshold. */ abstract class StartEndTimesBase implements TaskRuntimeEstimator { - static final float MINIMUM_COMPLETE_PROPORTION_TO_SPECULATE - = 0.05F; - static final int MINIMUM_COMPLETE_NUMBER_TO_SPECULATE - = 1; + static final float MINIMUM_COMPLETE_PROPORTION_TO_SPECULATE = 0.05F; + static final int MINIMUM_COMPLETE_NUMBER_TO_SPECULATE = 1; protected Vertex vertex; @@ -50,56 +48,58 @@ abstract class StartEndTimesBase implements TaskRuntimeEstimator { protected final DataStatistics taskStatistics = new DataStatistics(); - private float slowTaskRelativeTresholds; + private float slowTaskRelativeThresholds; protected final Set doneTasks = new HashSet(); @Override - public void enrollAttempt(TezTaskAttemptID id, long timestamp) { + public void enrollAttempt(final TezTaskAttemptID id, final long timestamp) { startTimes.put(id, timestamp); } @Override - public long attemptEnrolledTime(TezTaskAttemptID attemptID) { + public long attemptEnrolledTime(final TezTaskAttemptID attemptID) { Long result = startTimes.get(attemptID); return result == null ? Long.MAX_VALUE : result; } @Override - public void contextualize(Configuration conf, Vertex vertex) { - slowTaskRelativeTresholds = conf.getFloat( + public void contextualize(final Configuration conf, final Vertex vertexP) { + slowTaskRelativeThresholds = conf.getFloat( TezConfiguration.TEZ_AM_LEGACY_SPECULATIVE_SLOWTASK_THRESHOLD, 1.0f); - this.vertex = vertex; + this.vertex = vertexP; } - protected DataStatistics dataStatisticsForTask(TezTaskID taskID) { + protected DataStatistics dataStatisticsForTask(final TezTaskID taskID) { return taskStatistics; } @Override - public long thresholdRuntime(TezTaskID taskID) { + public long thresholdRuntime(final TezTaskID taskID) { int completedTasks = vertex.getCompletedTasks(); int totalTasks = vertex.getTotalTasks(); - + if (completedTasks < MINIMUM_COMPLETE_NUMBER_TO_SPECULATE - || (((float)completedTasks) / totalTasks) - < MINIMUM_COMPLETE_PROPORTION_TO_SPECULATE ) { + || (((float) completedTasks) / totalTasks) + < MINIMUM_COMPLETE_PROPORTION_TO_SPECULATE) { return Long.MAX_VALUE; } - - long result = (long)taskStatistics.outlier(slowTaskRelativeTresholds); + + long result = (long) taskStatistics.outlier(slowTaskRelativeThresholds); return result; } @Override public long newAttemptEstimatedRuntime() { - return (long)taskStatistics.mean(); + return (long) taskStatistics.mean(); } @Override - public void updateAttempt(TezTaskAttemptID attemptID, TaskAttemptState state, long timestamp) { + public void updateAttempt(final TezTaskAttemptID attemptID, + final TaskAttemptState state, + final long timestamp) { Task task = vertex.getTask(attemptID.getTaskID()); @@ -109,7 +109,7 @@ public void updateAttempt(TezTaskAttemptID attemptID, TaskAttemptState state, lo Long boxedStart = startTimes.get(attemptID); long start = boxedStart == null ? Long.MIN_VALUE : boxedStart; - + TaskAttempt taskAttempt = task.getAttempt(attemptID); if (taskAttempt.getState() == TaskAttemptState.SUCCEEDED) { diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/speculation/legacy/TaskRuntimeEstimator.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/speculation/legacy/TaskRuntimeEstimator.java index c8edd1eac4..4f747afc4e 100644 --- a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/speculation/legacy/TaskRuntimeEstimator.java +++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/speculation/legacy/TaskRuntimeEstimator.java @@ -29,13 +29,14 @@ * */ public interface TaskRuntimeEstimator { - public void enrollAttempt(TezTaskAttemptID id, long timestamp); + void enrollAttempt(TezTaskAttemptID id, long timestamp); - public long attemptEnrolledTime(TezTaskAttemptID attemptID); + long attemptEnrolledTime(TezTaskAttemptID attemptID); - public void updateAttempt(TezTaskAttemptID taId, TaskAttemptState reportedState, long timestamp); + void updateAttempt(TezTaskAttemptID taId, + TaskAttemptState reportedState, long timestamp); - public void contextualize(Configuration conf, Vertex vertex); + void contextualize(Configuration conf, Vertex vertex); /** * @@ -52,7 +53,7 @@ public interface TaskRuntimeEstimator { * however long. * */ - public long thresholdRuntime(TezTaskID id); + long thresholdRuntime(TezTaskID id); /** * @@ -64,7 +65,7 @@ public interface TaskRuntimeEstimator { * we don't have enough information yet to produce an estimate. * */ - public long estimatedRuntime(TezTaskAttemptID id); + long estimatedRuntime(TezTaskAttemptID id); /** * @@ -75,7 +76,7 @@ public interface TaskRuntimeEstimator { * we don't have enough information yet to produce an estimate. * */ - public long newAttemptEstimatedRuntime(); + long newAttemptEstimatedRuntime(); /** * @@ -87,5 +88,20 @@ public interface TaskRuntimeEstimator { * we don't have enough information yet to produce an estimate. * */ - public long runtimeEstimateVariance(TezTaskAttemptID id); + long runtimeEstimateVariance(TezTaskAttemptID id); + + /** + * + * Returns true if the estimator has no updates records for a threshold time + * window. This helps to identify task attempts that are stalled at the + * beginning of execution. + * + * @param id the {@link TezTaskAttemptID} of the attempt we are asking about + * @param timeStamp the time of the report we compare with + * @return true if the task attempt has no progress for a given time window + * + */ + default boolean hasStagnatedProgress(TezTaskAttemptID id, long timeStamp) { + return false; + } } diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/speculation/legacy/forecast/SimpleExponentialSmoothing.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/speculation/legacy/forecast/SimpleExponentialSmoothing.java new file mode 100644 index 0000000000..e7b7dcd57c --- /dev/null +++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/speculation/legacy/forecast/SimpleExponentialSmoothing.java @@ -0,0 +1,336 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tez.dag.app.dag.speculation.legacy.forecast; + +import java.util.concurrent.atomic.AtomicReference; + +/** + * Implementation of the static model for Simple exponential smoothing. + */ +public class SimpleExponentialSmoothing { + private static final double DEFAULT_FORECAST = -1.0d; + private final int kMinimumReads; + private final long kStagnatedWindow; + private final long startTime; + private long timeConstant; + + /** + * Holds reference to the current forecast record. + */ + private AtomicReference forecastRefEntry; + + /** + * Create forecast simple exponential smoothing. + * + * @param timeConstant the time constant + * @param skipCnt the skip cnt + * @param stagnatedWindow the stagnated window + * @param timeStamp the time stamp + * @return the simple exponential smoothing + */ + public static SimpleExponentialSmoothing createForecast( + final long timeConstant, + final int skipCnt, final long stagnatedWindow, final long timeStamp) { + return new SimpleExponentialSmoothing(timeConstant, skipCnt, + stagnatedWindow, timeStamp); + } + + /** + * Instantiates a new Simple exponential smoothing. + * + * @param ktConstant the kt constant + * @param skipCnt the skip cnt + * @param stagnatedWindow the stagnated window + * @param timeStamp the time stamp + */ + SimpleExponentialSmoothing(final long ktConstant, final int skipCnt, + final long stagnatedWindow, final long timeStamp) { + this.kMinimumReads = skipCnt; + this.kStagnatedWindow = stagnatedWindow; + this.timeConstant = ktConstant; + this.startTime = timeStamp; + this.forecastRefEntry = new AtomicReference(null); + } + + private class ForecastRecord { + private final double alpha; + private final long timeStamp; + private final double sample; + private final double rawData; + private double forecast; + private final double sseError; + private final long myIndex; + private ForecastRecord prevRec; + + /** + * Instantiates a new Forecast record. + * + * @param currForecast the curr forecast + * @param currRawData the curr raw data + * @param currTimeStamp the curr time stamp + */ + ForecastRecord(final double currForecast, final double currRawData, + final long currTimeStamp) { + this(0.0, currForecast, currRawData, currForecast, currTimeStamp, 0.0, 0); + } + + /** + * Instantiates a new Forecast record. + * + * @param alphaVal the alpha val + * @param currSample the curr sample + * @param currRawData the curr raw data + * @param currForecast the curr forecast + * @param currTimeStamp the curr time stamp + * @param accError the acc error + * @param index the index + */ + ForecastRecord(final double alphaVal, final double currSample, + final double currRawData, + final double currForecast, final long currTimeStamp, + final double accError, + final long index) { + this.timeStamp = currTimeStamp; + this.alpha = alphaVal; + this.sample = currSample; + this.forecast = currForecast; + this.rawData = currRawData; + this.sseError = accError; + this.myIndex = index; + } + + private ForecastRecord createForecastRecord(final double alphaVal, + final double currSample, + final double currRawData, + final double currForecast, final long currTimeStamp, + final double accError, + final long index, + final ForecastRecord prev) { + ForecastRecord forecastRec = + new ForecastRecord(alphaVal, currSample, currRawData, currForecast, + currTimeStamp, accError, index); + forecastRec.prevRec = prev; + return forecastRec; + } + + private double preProcessRawData(final double rData, final long newTime) { + return processRawData(this.rawData, this.timeStamp, rData, newTime); + } + + /** + * Append forecast record. + * + * @param newTimeStamp the new time stamp + * @param rData the r data + * @return the forecast record + */ + public ForecastRecord append(final long newTimeStamp, final double rData) { + if (this.timeStamp >= newTimeStamp + && Double.compare(this.rawData, rData) >= 0) { + // progress reported twice. Do nothing. + return this; + } + ForecastRecord refRecord = this; + if (newTimeStamp == this.timeStamp) { + // we need to restore old value if possible + if (this.prevRec != null) { + refRecord = this.prevRec; + } + } + double newSample = refRecord.preProcessRawData(rData, newTimeStamp); + long deltaTime = this.timeStamp - newTimeStamp; + if (refRecord.myIndex == kMinimumReads) { + timeConstant = Math.max(timeConstant, newTimeStamp - startTime); + } + double smoothFactor = + 1 - Math.exp(((double) deltaTime) / timeConstant); + double forecastVal = + smoothFactor * newSample + (1.0 - smoothFactor) * refRecord.forecast; + double newSSEError = + refRecord.sseError + Math.pow(newSample - refRecord.forecast, 2); + return refRecord + .createForecastRecord(smoothFactor, newSample, rData, forecastVal, + newTimeStamp, newSSEError, refRecord.myIndex + 1, refRecord); + } + } + + /** + * checks if the task is hanging up. + * + * @param timeStamp current time of the scan. + * @return true if we have number of samples > kMinimumReads and the record + * timestamp has expired. + */ + public boolean isDataStagnated(final long timeStamp) { + ForecastRecord rec = forecastRefEntry.get(); + if (rec != null && rec.myIndex > kMinimumReads) { + return (rec.timeStamp + kStagnatedWindow) > timeStamp; + } + return false; + } + + /** + * Process raw data double. + * + * @param oldRawData the old raw data + * @param oldTime the old time + * @param newRawData the new raw data + * @param newTime the new time + * @return the double + */ + static double processRawData(final double oldRawData, final long oldTime, + final double newRawData, final long newTime) { + double rate = (newRawData - oldRawData) / (newTime - oldTime); + return rate; + } + + /** + * Incorporate reading. + * + * @param timeStamp the time stamp + * @param currRawData the curr raw data + */ + public void incorporateReading(final long timeStamp, + final double currRawData) { + ForecastRecord oldRec = forecastRefEntry.get(); + if (oldRec == null) { + double oldForecast = + processRawData(0, startTime, currRawData, timeStamp); + forecastRefEntry.compareAndSet(null, + new ForecastRecord(oldForecast, 0.0d, startTime)); + incorporateReading(timeStamp, currRawData); + return; + } + while (!forecastRefEntry.compareAndSet(oldRec, oldRec.append(timeStamp, + currRawData))) { + oldRec = forecastRefEntry.get(); + } + } + + /** + * Gets forecast. + * + * @return the forecast + */ + public double getForecast() { + ForecastRecord rec = forecastRefEntry.get(); + if (rec != null && rec.myIndex > kMinimumReads) { + return rec.forecast; + } + return DEFAULT_FORECAST; + } + + /** + * Is default forecast boolean. + * + * @param value the value + * @return the boolean + */ + public boolean isDefaultForecast(final double value) { + return value == DEFAULT_FORECAST; + } + + /** + * Gets sse. + * + * @return the sse + */ + public double getSSE() { + ForecastRecord rec = forecastRefEntry.get(); + if (rec != null) { + return rec.sseError; + } + return DEFAULT_FORECAST; + } + + /** + * Is error within bound boolean. + * + * @param bound the bound + * @return the boolean + */ + public boolean isErrorWithinBound(final double bound) { + double squaredErr = getSSE(); + if (squaredErr < 0) { + return false; + } + return bound > squaredErr; + } + + /** + * Gets raw data. + * + * @return the raw data + */ + public double getRawData() { + ForecastRecord rec = forecastRefEntry.get(); + if (rec != null) { + return rec.rawData; + } + return DEFAULT_FORECAST; + } + + /** + * Gets time stamp. + * + * @return the time stamp + */ + public long getTimeStamp() { + ForecastRecord rec = forecastRefEntry.get(); + if (rec != null) { + return rec.timeStamp; + } + return 0L; + } + + /** + * Gets start time. + * + * @return the start time + */ + public long getStartTime() { + return startTime; + } + + /** + * Gets forecast ref entry. + * + * @return the forecast ref entry + */ + public AtomicReference getForecastRefEntry() { + return forecastRefEntry; + } + + @Override + public String toString() { + String res = "NULL"; + ForecastRecord rec = forecastRefEntry.get(); + if (rec != null) { + StringBuilder strB = new StringBuilder("rec.index = ").append(rec.myIndex) + .append(", timeStamp t: ").append(rec.timeStamp) + .append(", forecast: ").append(rec.forecast).append(", sample: ") + .append(rec.sample).append(", raw: ").append(rec.rawData) + .append(", error: ").append(rec.sseError).append(", alpha: ") + .append(rec.alpha); + res = strB.toString(); + } + return res; + } +} + diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/speculation/legacy/forecast/package-info.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/speculation/legacy/forecast/package-info.java new file mode 100644 index 0000000000..3ed8b6accb --- /dev/null +++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/speculation/legacy/forecast/package-info.java @@ -0,0 +1,20 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +@InterfaceAudience.Private +package org.apache.tez.dag.app.dag.speculation.legacy.forecast; +import org.apache.hadoop.classification.InterfaceAudience; diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/ContainerLauncherManager.java b/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/ContainerLauncherManager.java index 58d87c9672..65360d6d01 100644 --- a/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/ContainerLauncherManager.java +++ b/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/ContainerLauncherManager.java @@ -16,12 +16,14 @@ import java.net.UnknownHostException; import java.util.List; +import java.util.Set; import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; +import org.apache.tez.common.Preconditions; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.service.AbstractService; +import org.apache.hadoop.yarn.api.records.NodeId; import org.apache.hadoop.yarn.event.Event; import org.apache.hadoop.yarn.event.EventHandler; import org.apache.tez.Utils; @@ -35,6 +37,8 @@ import org.apache.tez.dag.app.dag.event.DAGAppMasterEventType; import org.apache.tez.dag.app.dag.event.DAGAppMasterEventUserServiceFatalError; import org.apache.tez.dag.records.TezDAGID; +import org.apache.tez.dag.records.TezTaskAttemptID; +import org.apache.tez.dag.records.TezVertexID; import org.apache.tez.serviceplugins.api.ContainerLaunchRequest; import org.apache.tez.serviceplugins.api.ContainerLauncher; import org.apache.tez.serviceplugins.api.ContainerLauncherContext; @@ -200,6 +204,18 @@ public void dagComplete(TezDAGID dag, JobTokenSecretManager secretManager) { } } + public void vertexComplete(TezVertexID vertex, JobTokenSecretManager secretManager, Set nodeIdList) { + for (int i = 0; i < containerLaunchers.length; i++) { + containerLaunchers[i].vertexComplete(vertex, secretManager, nodeIdList); + } + } + + public void taskAttemptFailed(TezTaskAttemptID taskAttemptID, JobTokenSecretManager secretManager, NodeId nodeId) { + for (int i = 0; i < containerLaunchers.length; i++) { + containerLaunchers[i].taskAttemptFailed(taskAttemptID, secretManager, nodeId); + } + } + public void dagSubmitted() { // Nothing to do right now. Indicates that a new DAG has been submitted and // the context has updated information. diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/ContainerLauncherWrapper.java b/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/ContainerLauncherWrapper.java index 8ecac14856..4703abe100 100644 --- a/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/ContainerLauncherWrapper.java +++ b/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/ContainerLauncherWrapper.java @@ -14,9 +14,14 @@ package org.apache.tez.dag.app.launcher; +import java.util.Set; + import org.apache.tez.common.DagContainerLauncher; +import org.apache.hadoop.yarn.api.records.NodeId; import org.apache.tez.common.security.JobTokenSecretManager; import org.apache.tez.dag.records.TezDAGID; +import org.apache.tez.dag.records.TezTaskAttemptID; +import org.apache.tez.dag.records.TezVertexID; import org.apache.tez.serviceplugins.api.ContainerLaunchRequest; import org.apache.tez.serviceplugins.api.ContainerLauncher; import org.apache.tez.serviceplugins.api.ContainerStopRequest; @@ -46,4 +51,17 @@ public void dagComplete(TezDAGID dag, JobTokenSecretManager jobTokenSecretManage ((DagContainerLauncher)real).dagComplete(dag, jobTokenSecretManager); } } + + public void vertexComplete(TezVertexID vertex, JobTokenSecretManager jobTokenSecretManager, Set nodeIdList) { + if (real instanceof DagContainerLauncher) { + ((DagContainerLauncher) real).vertexComplete(vertex, jobTokenSecretManager, nodeIdList); + } + } + + public void taskAttemptFailed(TezTaskAttemptID taskAttemptID, JobTokenSecretManager jobTokenSecretManager, + NodeId nodeId) { + if (real instanceof DagContainerLauncher) { + ((DagContainerLauncher) real).taskAttemptFailed(taskAttemptID, jobTokenSecretManager, nodeId); + } + } } diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/ContainerOp.java b/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/ContainerOp.java index c62de660ac..df4a9c0b1b 100644 --- a/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/ContainerOp.java +++ b/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/ContainerOp.java @@ -14,7 +14,7 @@ package org.apache.tez.dag.app.launcher; -import com.google.common.base.Preconditions; +import org.apache.tez.common.Preconditions; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.tez.serviceplugins.api.ContainerLaunchRequest; import org.apache.tez.serviceplugins.api.ContainerLauncherOperationBase; diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/DagDeleteRunnable.java b/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/DagDeleteRunnable.java index eac745e1ff..cbea36b86d 100644 --- a/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/DagDeleteRunnable.java +++ b/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/DagDeleteRunnable.java @@ -54,7 +54,7 @@ public void run() { try { URL baseURL = TezRuntimeUtils.constructBaseURIForShuffleHandlerDagComplete( nodeId.getHost(), shufflePort, - dag.getApplicationId().toString(), dag.getId(), false); + dag.getApplicationId().toString(), dag.getId(), httpConnectionParams.isSslShuffle()); httpConnection = TezRuntimeUtils.getHttpConnection(true, baseURL, httpConnectionParams, "DAGDelete", jobTokenSecretManager); httpConnection.connect(); diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/DeletionTracker.java b/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/DeletionTracker.java index 27ece70513..56760c86be 100644 --- a/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/DeletionTracker.java +++ b/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/DeletionTracker.java @@ -18,10 +18,14 @@ package org.apache.tez.dag.app.launcher; +import java.util.Set; + import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.yarn.api.records.NodeId; import org.apache.tez.common.security.JobTokenSecretManager; import org.apache.tez.dag.records.TezDAGID; +import org.apache.tez.dag.records.TezTaskAttemptID; +import org.apache.tez.dag.records.TezVertexID; public abstract class DeletionTracker { @@ -35,6 +39,15 @@ public void dagComplete(TezDAGID dag, JobTokenSecretManager jobTokenSecretManage //do nothing } + public void vertexComplete(TezVertexID vertex, JobTokenSecretManager jobTokenSecretManager, Set nodeIdList) { + //do nothing + } + + public void taskAttemptFailed(TezTaskAttemptID taskAttemptID, JobTokenSecretManager jobTokenSecretManager, + NodeId nodeId) { + //do nothing + } + public void addNodeShufflePort(NodeId nodeId, int port) { //do nothing } diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/DeletionTrackerImpl.java b/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/DeletionTrackerImpl.java index 06dae2d2b0..73eaa68e72 100644 --- a/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/DeletionTrackerImpl.java +++ b/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/DeletionTrackerImpl.java @@ -21,6 +21,7 @@ import java.util.HashMap; import java.util.Map; +import java.util.Set; import java.util.concurrent.ExecutorService; import java.util.concurrent.LinkedBlockingQueue; import java.util.concurrent.RejectedExecutionException; @@ -34,6 +35,8 @@ import org.apache.tez.dag.records.TezDAGID; import org.apache.hadoop.conf.Configuration; import org.apache.tez.dag.api.TezConfiguration; +import org.apache.tez.dag.records.TezTaskAttemptID; +import org.apache.tez.dag.records.TezVertexID; import org.apache.tez.runtime.library.common.TezRuntimeUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -70,6 +73,26 @@ public void dagComplete(TezDAGID dag, JobTokenSecretManager jobTokenSecretManage } } + @Override + public void taskAttemptFailed(TezTaskAttemptID taskAttemptID, JobTokenSecretManager jobTokenSecretManager, + NodeId nodeId) { + super.taskAttemptFailed(taskAttemptID, jobTokenSecretManager, nodeId); + if (nodeIdShufflePortMap == null || nodeIdShufflePortMap.get(nodeId) == null) { + LOG.warn("Unable to find the shuffle port for shuffle data deletion of failed task attempt."); + return; + } + int shufflePort = nodeIdShufflePortMap.get(nodeId); + if (shufflePort != TezRuntimeUtils.INVALID_PORT) { + TaskAttemptFailedRunnable taskAttemptFailedRunnable = new TaskAttemptFailedRunnable(nodeId, shufflePort, + taskAttemptID, TezRuntimeUtils.getHttpConnectionParams(conf), jobTokenSecretManager); + try { + dagCleanupService.submit(taskAttemptFailedRunnable); + } catch (RejectedExecutionException rejectedException) { + LOG.info("Ignoring failed task attempt deletion request for " + taskAttemptFailedRunnable); + } + } + } + @Override public void addNodeShufflePort(NodeId nodeId, int port) { if (port != TezRuntimeUtils.INVALID_PORT) { @@ -92,4 +115,25 @@ public void shutdown() { } nodeIdShufflePortMap = null; } + + @Override + public void vertexComplete(TezVertexID vertex, JobTokenSecretManager jobTokenSecretManager, Set nodeIdList) { + super.vertexComplete(vertex, jobTokenSecretManager, nodeIdList); + String vertexId = String.format("%02d", vertex.getId()); + for (NodeId nodeId : nodeIdList) { + Integer shufflePort = null; + if (nodeIdShufflePortMap != null) { + shufflePort = nodeIdShufflePortMap.get(nodeId); + } + if (shufflePort != null) { + VertexDeleteRunnable vertexDeleteRunnable = new VertexDeleteRunnable(vertex, jobTokenSecretManager, nodeId, + shufflePort, vertexId, TezRuntimeUtils.getHttpConnectionParams(conf)); + try { + dagCleanupService.submit(vertexDeleteRunnable); + } catch (RejectedExecutionException rejectedException) { + LOG.info("Ignoring deletion request for " + vertexDeleteRunnable); + } + } + } + } } diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/LocalContainerLauncher.java b/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/LocalContainerLauncher.java index d50b49eb52..47cc9f1325 100644 --- a/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/LocalContainerLauncher.java +++ b/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/LocalContainerLauncher.java @@ -18,13 +18,13 @@ package org.apache.tez.dag.app.launcher; - import java.io.IOException; import java.net.InetAddress; import java.net.UnknownHostException; import java.nio.ByteBuffer; import java.util.HashMap; import java.util.Map; +import java.util.Set; import java.util.concurrent.BlockingQueue; import java.util.concurrent.Callable; import java.util.concurrent.CancellationException; @@ -35,7 +35,7 @@ import java.util.concurrent.RejectedExecutionException; import java.util.concurrent.atomic.AtomicBoolean; -import com.google.common.base.Preconditions; +import org.apache.tez.common.Preconditions; import com.google.common.collect.Maps; import com.google.common.util.concurrent.FutureCallback; import com.google.common.util.concurrent.Futures; @@ -44,12 +44,14 @@ import com.google.common.util.concurrent.MoreExecutors; import com.google.common.util.concurrent.ThreadFactoryBuilder; +import org.apache.hadoop.yarn.api.records.NodeId; import org.apache.tez.common.DagContainerLauncher; import org.apache.tez.common.ReflectionUtils; import org.apache.tez.common.TezUtils; import org.apache.tez.dag.records.TezDAGID; -import org.apache.tez.hadoop.shim.DefaultHadoopShim; import org.apache.tez.common.security.JobTokenSecretManager; +import org.apache.tez.dag.records.TezTaskAttemptID; +import org.apache.tez.dag.records.TezVertexID; import org.apache.tez.runtime.library.common.TezRuntimeUtils; import org.apache.tez.runtime.library.common.shuffle.ShuffleUtils; import org.apache.tez.serviceplugins.api.ContainerLaunchRequest; @@ -95,10 +97,16 @@ public class LocalContainerLauncher extends DagContainerLauncher { private final boolean isLocalMode; int shufflePort = TezRuntimeUtils.INVALID_PORT; private DeletionTracker deletionTracker; + private boolean dagDelete; + private boolean vertexDelete; + private boolean failedTaskAttemptDelete; - private final ConcurrentHashMap + private final ConcurrentHashMap> runningContainers = - new ConcurrentHashMap(); + new ConcurrentHashMap<>(); + + private final ConcurrentHashMap + cacheManagers = new ConcurrentHashMap<>(); private final ExecutorService callbackExecutor = Executors.newFixedThreadPool(1, new ThreadFactoryBuilder().setDaemon(true).setNameFormat("CallbackExecutor").build()); @@ -154,10 +162,17 @@ public LocalContainerLauncher(ContainerLauncherContext containerLauncherContext, new ThreadFactoryBuilder().setDaemon(true).setNameFormat("LocalTaskExecutionThread #%d") .build()); this.taskExecutorService = MoreExecutors.listeningDecorator(rawExecutor); - boolean cleanupDagDataOnComplete = ShuffleUtils.isTezShuffleHandler(conf) - && conf.getBoolean(TezConfiguration.TEZ_AM_DAG_CLEANUP_ON_COMPLETION, + dagDelete = ShuffleUtils.isTezShuffleHandler(conf) && + conf.getBoolean(TezConfiguration.TEZ_AM_DAG_CLEANUP_ON_COMPLETION, TezConfiguration.TEZ_AM_DAG_CLEANUP_ON_COMPLETION_DEFAULT); - if (cleanupDagDataOnComplete) { + vertexDelete = ShuffleUtils.isTezShuffleHandler(conf) && + conf.getInt(TezConfiguration.TEZ_AM_VERTEX_CLEANUP_HEIGHT, + TezConfiguration.TEZ_AM_VERTEX_CLEANUP_HEIGHT_DEFAULT) > 0; + failedTaskAttemptDelete = ShuffleUtils.isTezShuffleHandler(conf) && + conf.getBoolean(TezConfiguration.TEZ_AM_TASK_ATTEMPT_CLEANUP_ON_FAILURE, + TezConfiguration.TEZ_AM_TASK_ATTEMPT_CLEANUP_ON_FAILURE_DEFAULT); + + if (dagDelete || vertexDelete || failedTaskAttemptDelete) { String deletionTrackerClassName = conf.get(TezConfiguration.TEZ_AM_DELETION_TRACKER_CLASS, TezConfiguration.TEZ_AM_DELETION_TRACKER_CLASS_DEFAULT); deletionTracker = ReflectionUtils.createClazzInstance( @@ -229,6 +244,10 @@ void sendContainerLaunchFailedMsg(ContainerId containerId, String message) { private void handleLaunchFailed(Throwable t, ContainerId containerId) { String message; + + // clean up distributed cache files + cleanupCacheFiles(containerId); + if (t instanceof RejectedExecutionException) { message = "Failed to queue container launch for container Id: " + containerId; } else { @@ -244,10 +263,22 @@ private void launch(ContainerLaunchRequest event) { String tokenIdentifier = context.getApplicationID().toString(); try { TezChild tezChild; + try { int taskCommId = context.getTaskCommunicatorIdentifier(event.getTaskCommunicatorName()); + + Configuration conf = context.getAMConf(); + if (isLocalMode) { + TezLocalCacheManager cacheManager = new TezLocalCacheManager( + event.getContainerLaunchContext().getLocalResources(), + conf + ); + cacheManagers.put(event.getContainerId(), cacheManager); + cacheManager.localize(); + } + tezChild = - createTezChild(context.getAMConf(), event.getContainerId(), tokenIdentifier, + createTezChild(conf, event.getContainerId(), tokenIdentifier, context.getApplicationAttemptId().getAttemptId(), context.getLocalDirs(), ((TezTaskCommunicatorImpl)tal.getTaskCommunicator(taskCommId).getTaskCommunicator()).getUmbilical(), TezCommonUtils.parseCredentialsBytes(event.getContainerLaunchContext().getTokens().array())); @@ -264,7 +295,7 @@ private void launch(ContainerLaunchRequest event) { ListenableFuture runningTaskFuture = taskExecutorService.submit(createSubTask(tezChild, event.getContainerId())); RunningTaskCallback callback = new RunningTaskCallback(event.getContainerId()); - runningContainers.put(event.getContainerId(), callback); + runningContainers.put(event.getContainerId(), runningTaskFuture); Futures.addCallback(runningTaskFuture, callback, callbackExecutor); if (deletionTracker != null) { deletionTracker.addNodeShufflePort(event.getNodeId(), shufflePort); @@ -276,19 +307,17 @@ private void launch(ContainerLaunchRequest event) { private void stop(ContainerStopRequest event) { // A stop_request will come in when a task completes and reports back or a preemption decision - // is made. Currently the LocalTaskScheduler does not support preemption. Also preemption - // will not work in local mode till Tez supports task preemption instead of container preemption. - RunningTaskCallback callback = + // is made. + ListenableFuture future = runningContainers.get(event.getContainerId()); - if (callback == null) { + if (future == null) { LOG.info("Ignoring stop request for containerId: " + event.getContainerId()); } else { - LOG.info( - "Ignoring stop request for containerId {}. Relying on regular task shutdown for it to end", - event.getContainerId()); - // Allow the tezChild thread to run it's course. It'll receive a shutdown request from the - // AM eventually since the task and container will be unregistered. - // This will need to be fixed once interrupting tasks is supported. + LOG.info("Stopping containerId: {}, isDone: {}", event.getContainerId(), + future.isDone()); + future.cancel(false); + LOG.debug("Stopped containerId: {}, isCancelled: {}", event.getContainerId(), + future.isCancelled()); } // Send this event to maintain regular control flow. This isn't of much use though. getContext().containerStopRequested(event.getContainerId()); @@ -322,6 +351,9 @@ public void onSuccess(TezChild.ContainerExecutionResult result) { (result.getThrowable() == null ? null : result.getThrowable().getMessage()) : result.getErrorMessage(), TaskAttemptEndReason.APPLICATION_ERROR); } + + // clean up distributed cache files + cleanupCacheFiles(containerId); } @Override @@ -341,6 +373,22 @@ public void onFailure(Throwable t) { TezChild.ContainerExecutionResult.ExitStatus.SUCCESS.getExitCode(), "CancellationException", TaskAttemptEndReason.COMMUNICATION_ERROR.CONTAINER_EXITED); } + + // clean up distributed cache files + cleanupCacheFiles(containerId); + } + } + + private void cleanupCacheFiles(ContainerId container) { + if (isLocalMode) { + TezLocalCacheManager manager = cacheManagers.remove(container); + try { + if (manager != null) { + manager.cleanup(); + } + } catch (IOException e) { + LOG.info("Unable to clean up local cache files: ", e); + } } } @@ -407,9 +455,23 @@ public void stopContainer(ContainerStopRequest stopRequest) { @Override public void dagComplete(TezDAGID dag, JobTokenSecretManager jobTokenSecretManager) { - if (deletionTracker != null) { + if (dagDelete && deletionTracker != null) { deletionTracker.dagComplete(dag, jobTokenSecretManager); } } + @Override + public void vertexComplete(TezVertexID dag, JobTokenSecretManager jobTokenSecretManager, Set nodeIdList) { + if (vertexDelete && deletionTracker != null) { + deletionTracker.vertexComplete(dag, jobTokenSecretManager, nodeIdList); + } + } + + @Override + public void taskAttemptFailed(TezTaskAttemptID taskAttemptID, JobTokenSecretManager jobTokenSecretManager, + NodeId nodeId) { + if (failedTaskAttemptDelete && deletionTracker != null) { + deletionTracker.taskAttemptFailed(taskAttemptID, jobTokenSecretManager, nodeId); + } + } } diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/TaskAttemptFailedDeleteRunnable.java b/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/TaskAttemptFailedDeleteRunnable.java new file mode 100644 index 0000000000..3a9f5345a0 --- /dev/null +++ b/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/TaskAttemptFailedDeleteRunnable.java @@ -0,0 +1,82 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tez.dag.app.launcher; + +import org.apache.hadoop.yarn.api.records.NodeId; +import org.apache.tez.common.security.JobTokenSecretManager; +import org.apache.tez.dag.records.TezTaskAttemptID; +import org.apache.tez.http.BaseHttpConnection; +import org.apache.tez.http.HttpConnectionParams; +import org.apache.tez.runtime.library.common.TezRuntimeUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.net.URL; + +class TaskAttemptFailedRunnable implements Runnable { + private static final Logger LOG = LoggerFactory.getLogger(TaskAttemptFailedRunnable.class); + private final NodeId nodeId; + private final TezTaskAttemptID taskAttemptID; + private final JobTokenSecretManager jobTokenSecretManager; + private final int shufflePort; + private final HttpConnectionParams httpConnectionParams; + + TaskAttemptFailedRunnable(NodeId nodeId, int shufflePort, TezTaskAttemptID taskAttemptID, + HttpConnectionParams httpConnectionParams, + JobTokenSecretManager jobTokenSecretMgr) { + this.nodeId = nodeId; + this.shufflePort = shufflePort; + this.taskAttemptID = taskAttemptID; + this.httpConnectionParams = httpConnectionParams; + this.jobTokenSecretManager = jobTokenSecretMgr; + } + + @Override + public void run() { + BaseHttpConnection httpConnection = null; + try { + URL baseURL = TezRuntimeUtils.constructBaseURIForShuffleHandlerTaskAttemptFailed( + nodeId.getHost(), shufflePort, taskAttemptID.getTaskID().getVertexID().getDAGID(). + getApplicationId().toString(), taskAttemptID.getTaskID().getVertexID().getDAGID().getId(), + taskAttemptID.toString(), httpConnectionParams.isSslShuffle()); + httpConnection = TezRuntimeUtils.getHttpConnection(true, baseURL, httpConnectionParams, + "FailedTaskAttemptDelete", jobTokenSecretManager); + httpConnection.connect(); + httpConnection.getInputStream(); + } catch (Exception e) { + LOG.warn("Could not setup HTTP Connection to the node " + nodeId.getHost() + + " for failed task attempt delete. ", e); + } finally { + try { + if (httpConnection != null) { + httpConnection.cleanup(true); + } + } catch (IOException ioe) { + LOG.warn("Encountered IOException for " + nodeId.getHost() + " during close. ", ioe); + } + } + } + + @Override + public String toString() { + return "TaskAttemptFailedRunnable nodeId=" + nodeId + ", shufflePort=" + shufflePort + ", taskAttemptId=" + + taskAttemptID.toString(); + } +} diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/TezContainerLauncherImpl.java b/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/TezContainerLauncherImpl.java index 67fc4ed636..654224adb5 100644 --- a/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/TezContainerLauncherImpl.java +++ b/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/TezContainerLauncherImpl.java @@ -21,8 +21,8 @@ import java.io.IOException; import java.nio.ByteBuffer; import java.util.Collections; -import java.util.HashMap; import java.util.Map; +import java.util.Set; import java.util.concurrent.BlockingQueue; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.LinkedBlockingQueue; @@ -34,6 +34,7 @@ import org.apache.commons.lang.exception.ExceptionUtils; import org.apache.hadoop.io.DataInputByteBuffer; +import org.apache.hadoop.yarn.api.records.NodeId; import org.apache.tez.common.DagContainerLauncher; import org.apache.tez.common.ReflectionUtils; import org.apache.tez.common.TezUtils; @@ -41,6 +42,8 @@ import org.apache.tez.dag.api.TezConstants; import org.apache.tez.dag.api.TezException; import org.apache.tez.dag.records.TezDAGID; +import org.apache.tez.dag.records.TezTaskAttemptID; +import org.apache.tez.dag.records.TezVertexID; import org.apache.tez.runtime.library.common.TezRuntimeUtils; import org.apache.tez.runtime.library.common.shuffle.ShuffleUtils; import org.apache.tez.serviceplugins.api.ContainerLaunchRequest; @@ -90,6 +93,9 @@ public class TezContainerLauncherImpl extends DagContainerLauncher { private ContainerManagementProtocolProxy cmProxy; private AtomicBoolean serviceStopped = new AtomicBoolean(false); private DeletionTracker deletionTracker = null; + private boolean dagDelete; + private boolean vertexDelete; + private boolean failedTaskAttemptDelete; private Container getContainer(ContainerOp event) { ContainerId id = event.getBaseOperation().getContainerId(); @@ -332,10 +338,17 @@ public void run() { }; eventHandlingThread.setName("ContainerLauncher Event Handler"); eventHandlingThread.start(); - boolean cleanupDagDataOnComplete = ShuffleUtils.isTezShuffleHandler(conf) - && conf.getBoolean(TezConfiguration.TEZ_AM_DAG_CLEANUP_ON_COMPLETION, - TezConfiguration.TEZ_AM_DAG_CLEANUP_ON_COMPLETION_DEFAULT); - if (cleanupDagDataOnComplete) { + dagDelete = ShuffleUtils.isTezShuffleHandler(conf) && + conf.getBoolean(TezConfiguration.TEZ_AM_DAG_CLEANUP_ON_COMPLETION, + TezConfiguration.TEZ_AM_DAG_CLEANUP_ON_COMPLETION_DEFAULT); + vertexDelete = ShuffleUtils.isTezShuffleHandler(conf) && + conf.getInt(TezConfiguration.TEZ_AM_VERTEX_CLEANUP_HEIGHT, + TezConfiguration.TEZ_AM_VERTEX_CLEANUP_HEIGHT_DEFAULT) > 0; + failedTaskAttemptDelete = ShuffleUtils.isTezShuffleHandler(conf) && + conf.getBoolean(TezConfiguration.TEZ_AM_TASK_ATTEMPT_CLEANUP_ON_FAILURE, + TezConfiguration.TEZ_AM_TASK_ATTEMPT_CLEANUP_ON_FAILURE_DEFAULT); + + if (dagDelete || vertexDelete || failedTaskAttemptDelete) { String deletionTrackerClassName = conf.get(TezConfiguration.TEZ_AM_DELETION_TRACKER_CLASS, TezConfiguration.TEZ_AM_DELETION_TRACKER_CLASS_DEFAULT); deletionTracker = ReflectionUtils.createClazzInstance( @@ -385,9 +398,7 @@ public void run() { // Load ContainerManager tokens before creating a connection. // TODO: Do it only once per NodeManager. ContainerId containerID = event.getBaseOperation().getContainerId(); - if (LOG.isDebugEnabled()) { - LOG.debug("Processing ContainerOperation {}", event); - } + LOG.debug("Processing ContainerOperation {}", event); Container c = getContainer(event); switch(event.getOpType()) { @@ -443,9 +454,23 @@ public void stopContainer(ContainerStopRequest stopRequest) { @Override public void dagComplete(TezDAGID dag, JobTokenSecretManager jobTokenSecretManager) { - if (deletionTracker != null) { + if (dagDelete && deletionTracker != null) { deletionTracker.dagComplete(dag, jobTokenSecretManager); } } + @Override + public void vertexComplete(TezVertexID vertex, JobTokenSecretManager jobTokenSecretManager, Set nodeIdList) { + if (vertexDelete && deletionTracker != null) { + deletionTracker.vertexComplete(vertex, jobTokenSecretManager, nodeIdList); + } + } + + @Override + public void taskAttemptFailed(TezTaskAttemptID taskAttemptID, JobTokenSecretManager jobTokenSecretManager, + NodeId nodeId) { + if (failedTaskAttemptDelete && deletionTracker != null) { + deletionTracker.taskAttemptFailed(taskAttemptID, jobTokenSecretManager, nodeId); + } + } } diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/TezLocalCacheManager.java b/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/TezLocalCacheManager.java new file mode 100644 index 0000000000..f4892ab81e --- /dev/null +++ b/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/TezLocalCacheManager.java @@ -0,0 +1,210 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tez.dag.app.launcher; + +import com.google.common.util.concurrent.ThreadFactoryBuilder; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileContext; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.security.UserGroupInformation; +import org.apache.hadoop.yarn.api.records.LocalResource; +import org.apache.hadoop.yarn.api.records.LocalResourceType; +import org.apache.hadoop.yarn.util.FSDownload; +import org.apache.tez.dag.api.TezConfiguration; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Paths; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; +import java.util.concurrent.ThreadFactory; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * This class is responsible for localizing files from the distributed cache for Tez local mode. + */ +public class TezLocalCacheManager { + + private static final Logger LOG = LoggerFactory.getLogger(TezLocalCacheManager.class); + + final private Map resources; + final private Configuration conf; + final private UserGroupInformation ugi; + final private FileContext fileContext; + final private java.nio.file.Path tempDir; + + final private Map resourceInfo = new HashMap<>(); + + public TezLocalCacheManager(Map resources, Configuration conf) throws IOException { + this.ugi = UserGroupInformation.getCurrentUser(); + this.fileContext = FileContext.getLocalFSFileContext(); + this.resources = resources; + this.conf = conf; + this.tempDir = Files.createTempDirectory(getLocalCacheRoot(), "tez-local-cache"); + } + + /** + * Localize this instance's resources by downloading and symlinking them. + * + * @throws IOException when an error occurs in download or link + */ + public void localize() throws IOException { + String absPath = getLocalCacheRoot().toAbsolutePath().normalize().toString(); + Path cwd = fileContext.makeQualified(new Path(absPath)); + ExecutorService threadPool = null; + + try { + // construct new threads with helpful names + ThreadFactory threadFactory = new ThreadFactoryBuilder() + .setNameFormat("TezLocalCacheManager Downloader #%d") + .build(); + threadPool = Executors.newCachedThreadPool(threadFactory); + + // start all fetches + for (Map.Entry entry : resources.entrySet()) { + String resourceName = entry.getKey(); + LocalResource resource = entry.getValue(); + + if (resource.getType() == LocalResourceType.PATTERN) { + throw new IllegalArgumentException("Resource type PATTERN not supported."); + } + + // linkPath is the path we want to symlink the file/directory into + Path linkPath = new Path(cwd, entry.getKey()); + + if (resourceInfo.containsKey(resource)) { + // We've already downloaded this resource and just need to add another link. + resourceInfo.get(resource).getLinkPaths().add(linkPath); + } else { + // submit task to download the object + java.nio.file.Path fp = Paths.get(resourceName).getFileName(); + String prefix = fp == null ? "" : fp.toString(); // The null case is unexpected, but FindBugs complains + java.nio.file.Path downloadDir = Files.createTempDirectory(tempDir, prefix); + Path dest = new Path(downloadDir.toAbsolutePath().toString()); + FSDownload downloader = new FSDownload(fileContext, ugi, conf, dest, resource); + Future downloadedPath = threadPool.submit(downloader); + resourceInfo.put(resource, new ResourceInfo(downloadedPath, linkPath)); + } + } + + // Link each file + for (Map.Entry entry : resourceInfo.entrySet()) { + LocalResource resource = entry.getKey(); + ResourceInfo resourceMeta = entry.getValue(); + + for (Path linkPath : resourceMeta.getLinkPaths()) { + Path targetPath; + + try { + // this blocks on the download completing + targetPath = resourceMeta.getDownloadPath().get(); + } catch (InterruptedException | ExecutionException e) { + throw new IOException(e); + } + + if (createSymlink(targetPath, linkPath)) { + LOG.info("Localized file: {} as {}", resource, linkPath); + } else { + LOG.warn("Failed to create symlink: {} <- {}", targetPath, linkPath); + } + } + } + } finally { + if (threadPool != null) { + threadPool.shutdownNow(); + } + } + } + + /** + * Clean up any symlinks and temp files that were created. + * + * @throws IOException when an error occurs in cleanup + */ + public void cleanup() throws IOException { + for (ResourceInfo info : resourceInfo.values()) { + for (Path linkPath : info.getLinkPaths()) { + if (fileContext.util().exists(linkPath)) { + fileContext.delete(linkPath, true); + } + } + } + + Path temp = new Path(tempDir.toString()); + if (fileContext.util().exists(temp)) { + fileContext.delete(temp, true); + } + } + + /** + * Create a symlink. + */ + private boolean createSymlink(Path target, Path link) throws IOException { + LOG.info("Creating symlink: {} <- {}", target, link); + String targetPath = target.toUri().getPath(); + String linkPath = link.toUri().getPath(); + + if (fileContext.util().exists(link)) { + LOG.warn("File already exists at symlink path: {}", link); + return false; + } else { + try { + Files.createSymbolicLink(Paths.get(linkPath), Paths.get(targetPath)); + return true; + } catch (UnsupportedOperationException | IOException e) { + LOG.warn("Unable to create symlink {} <- {}: {}", target, link, e); + return false; + } + } + } + + private java.nio.file.Path getLocalCacheRoot() { + return Paths.get(conf.get(TezConfiguration.TEZ_LOCAL_CACHE_ROOT_FOLDER, + TezConfiguration.TEZ_LOCAL_CACHE_ROOT_FOLDER_DEFAULT)); + } + + /** + * Wrapper to keep track of download path and link path. + */ + private static class ResourceInfo { + private final Future downloadPath; + private final Set linkPaths = new HashSet<>(); + + ResourceInfo(Future downloadPath, Path linkPath) { + this.downloadPath = downloadPath; + this.getLinkPaths().add(linkPath); + } + + Future getDownloadPath() { + return downloadPath; + } + + Set getLinkPaths() { + return linkPaths; + } + } +} diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/VertexDeleteRunnable.java b/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/VertexDeleteRunnable.java new file mode 100644 index 0000000000..3bfec9663a --- /dev/null +++ b/tez-dag/src/main/java/org/apache/tez/dag/app/launcher/VertexDeleteRunnable.java @@ -0,0 +1,83 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tez.dag.app.launcher; + +import org.apache.hadoop.yarn.api.records.NodeId; +import org.apache.tez.common.security.JobTokenSecretManager; +import org.apache.tez.dag.records.TezVertexID; +import org.apache.tez.http.BaseHttpConnection; +import org.apache.tez.http.HttpConnectionParams; +import org.apache.tez.runtime.library.common.TezRuntimeUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.net.URL; + +public class VertexDeleteRunnable implements Runnable { + private static final Logger LOG = LoggerFactory.getLogger(VertexDeleteRunnable.class); + final private TezVertexID vertex; + final private JobTokenSecretManager jobTokenSecretManager; + final private NodeId nodeId; + final private int shufflePort; + final private String vertexId; + final private HttpConnectionParams httpConnectionParams; + + VertexDeleteRunnable(TezVertexID vertex, JobTokenSecretManager jobTokenSecretManager, + NodeId nodeId, int shufflePort, String vertexId, + HttpConnectionParams httpConnectionParams) { + this.vertex = vertex; + this.jobTokenSecretManager = jobTokenSecretManager; + this.nodeId = nodeId; + this.shufflePort = shufflePort; + this.vertexId = vertexId; + this.httpConnectionParams = httpConnectionParams; + } + + @Override + public void run() { + BaseHttpConnection httpConnection = null; + try { + URL baseURL = TezRuntimeUtils.constructBaseURIForShuffleHandlerVertexComplete( + nodeId.getHost(), shufflePort, + vertex.getDAGID().getApplicationId().toString(), vertex.getDAGID().getId(), vertexId, + httpConnectionParams.isSslShuffle()); + httpConnection = TezRuntimeUtils.getHttpConnection(true, baseURL, httpConnectionParams, + "VertexDelete", jobTokenSecretManager); + httpConnection.connect(); + httpConnection.getInputStream(); + } catch (Exception e) { + LOG.warn("Could not setup HTTP Connection to the node %s " + nodeId.getHost() + + " for vertex shuffle delete. ", e); + } finally { + try { + if (httpConnection != null) { + httpConnection.cleanup(true); + } + } catch (IOException e) { + LOG.warn("Encountered IOException for " + nodeId.getHost() + " during close. ", e); + } + } + } + + @Override + public String toString() { + return "VertexDeleteRunnable nodeId=" + nodeId + ", shufflePort=" + shufflePort + ", vertexId=" + vertexId; + } +} diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/AMSchedulerEventTAEnded.java b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/AMSchedulerEventTAEnded.java index f7fee3acfe..107fbf62c7 100644 --- a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/AMSchedulerEventTAEnded.java +++ b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/AMSchedulerEventTAEnded.java @@ -42,7 +42,7 @@ public AMSchedulerEventTAEnded(TaskAttempt attempt, ContainerId containerId, } public TezTaskAttemptID getAttemptID() { - return this.attempt.getID(); + return this.attempt.getTaskAttemptID(); } public TaskAttempt getAttempt() { diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/DagAwareYarnTaskScheduler.java b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/DagAwareYarnTaskScheduler.java new file mode 100644 index 0000000000..131302a03a --- /dev/null +++ b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/DagAwareYarnTaskScheduler.java @@ -0,0 +1,2106 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tez.dag.app.rm; + +import com.google.common.annotations.VisibleForTesting; +import org.apache.tez.common.Preconditions; +import com.google.common.util.concurrent.ThreadFactoryBuilder; +import org.apache.commons.lang.mutable.MutableInt; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.util.StringUtils; +import org.apache.hadoop.util.Time; +import org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterResponse; +import org.apache.hadoop.yarn.api.records.Container; +import org.apache.hadoop.yarn.api.records.ContainerId; +import org.apache.hadoop.yarn.api.records.ContainerStatus; +import org.apache.hadoop.yarn.api.records.NodeId; +import org.apache.hadoop.yarn.api.records.NodeReport; +import org.apache.hadoop.yarn.api.records.Priority; +import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.api.records.ResourceRequest; +import org.apache.hadoop.yarn.client.api.AMRMClient; +import org.apache.hadoop.yarn.client.api.async.AMRMClientAsync; +import org.apache.hadoop.yarn.client.api.async.impl.AMRMClientAsyncImpl; +import org.apache.hadoop.yarn.client.api.impl.AMRMClientImpl; +import org.apache.hadoop.yarn.proto.YarnServiceProtos.SchedulerResourceTypes; +import org.apache.hadoop.yarn.util.RackResolver; +import org.apache.hadoop.yarn.util.resource.Resources; +import org.apache.tez.common.ContainerSignatureMatcher; +import org.apache.tez.common.TezUtils; +import org.apache.tez.dag.api.TezConfiguration; +import org.apache.tez.dag.app.dag.TaskAttempt; +import org.apache.tez.serviceplugins.api.DagInfo; +import org.apache.tez.serviceplugins.api.TaskAttemptEndReason; +import org.apache.tez.serviceplugins.api.TaskScheduler; +import org.apache.tez.serviceplugins.api.TaskSchedulerContext; +import org.apache.tez.serviceplugins.api.TaskSchedulerContext.AMState; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import javax.annotation.Nullable; +import javax.annotation.concurrent.GuardedBy; +import java.util.ArrayList; +import java.util.BitSet; +import java.util.Collection; +import java.util.Collections; +import java.util.Comparator; +import java.util.EnumSet; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.NavigableMap; +import java.util.PriorityQueue; +import java.util.Set; +import java.util.TreeMap; +import java.util.concurrent.Callable; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.Future; +import java.util.concurrent.RejectedExecutionException; +import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.ScheduledThreadPoolExecutor; +import java.util.concurrent.ThreadLocalRandom; +import java.util.concurrent.TimeUnit; + +/** + * A YARN task scheduler that is aware of the dependencies between vertices + * in the DAG and takes them into account when deciding how to schedule + * and preempt tasks. + * + * This scheduler makes the assumption that vertex IDs start at 0 and are + * densely allocated (i.e.: there are no "gaps" in the vertex ID space). + */ +public class DagAwareYarnTaskScheduler extends TaskScheduler + implements AMRMClientAsync.CallbackHandler { + private static final Logger LOG = LoggerFactory.getLogger(DagAwareYarnTaskScheduler.class); + private static final Comparator PREEMPT_ORDER_COMPARATOR = new PreemptOrderComparator(); + + private AMRMClientAsyncWrapper client; + private ScheduledExecutorService reuseExecutor; + private ResourceCalculator resourceCalculator; + private int numHeartbeats = 0; + private Resource totalResources = Resource.newInstance(0, 0); + @GuardedBy("this") + private Resource allocatedResources = Resource.newInstance(0, 0); + private final Set blacklistedNodes = Collections.newSetFromMap(new ConcurrentHashMap()); + private final ContainerSignatureMatcher signatureMatcher; + @GuardedBy("this") + private final RequestTracker requestTracker = new RequestTracker(); + @GuardedBy("this") + private final Map heldContainers = new HashMap<>(); + @GuardedBy("this") + private final IdleContainerTracker idleTracker = new IdleContainerTracker(); + @GuardedBy("this") + private final Map taskAssignments = new HashMap<>(); + + /** A mapping from the vertex ID to the set of containers assigned to tasks for that vertex */ + @GuardedBy("this") + private final Map> vertexAssignments = new HashMap<>(); + + /** If vertex N has at least one task assigned to a container then the corresponding bit at index N is set */ + @GuardedBy("this") + private final BitSet assignedVertices = new BitSet(); + + /** + * Tracks assigned tasks for released containers so the app can be notified properly when the + * container completion event finally arrives. + */ + @GuardedBy("this") + private final Map releasedContainers = new HashMap<>(); + + @GuardedBy("this") + private final Set sessionContainers = new HashSet<>(); + + /** + * Tracks the set of descendant vertices in the DAG for each vertex. The BitSet for descendants of vertex N + * are at array index N. If a bit is set at index X in the descendants BitSet then vertex X is a descendant + * of vertex N in the DAG. + */ + @GuardedBy("this") + private ArrayList vertexDescendants = null; + + private volatile boolean stopRequested = false; + private volatile boolean shouldUnregister = false; + private volatile boolean hasUnregistered = false; + + // cached configuration parameters + private boolean shouldReuseContainers; + private boolean reuseRackLocal; + private boolean reuseNonLocal; + private boolean reuseNewContainers; + private long localitySchedulingDelay; + private long idleContainerTimeoutMin; + private long idleContainerTimeoutMax; + private int sessionNumMinHeldContainers; + private int preemptionPercentage; + private int numHeartbeatsBetweenPreemptions; + private int lastPreemptionHeartbeat = 0; + private long preemptionMaxWaitTime; + + public DagAwareYarnTaskScheduler(TaskSchedulerContext taskSchedulerContext) { + super(taskSchedulerContext); + signatureMatcher = taskSchedulerContext.getContainerSignatureMatcher(); + } + + @Override + public void initialize() throws Exception { + initialize(new AMRMClientAsyncWrapper(new AMRMClientImpl(), 1000, this)); + } + + void initialize(AMRMClientAsyncWrapper client) throws Exception { + super.initialize(); + this.client = client; + Configuration conf = TezUtils.createConfFromUserPayload(getContext().getInitialUserPayload()); + client.init(conf); + + int heartbeatIntervalMax = conf.getInt( + TezConfiguration.TEZ_AM_RM_HEARTBEAT_INTERVAL_MS_MAX, + TezConfiguration.TEZ_AM_RM_HEARTBEAT_INTERVAL_MS_MAX_DEFAULT); + client.setHeartbeatInterval(heartbeatIntervalMax); + + shouldReuseContainers = conf.getBoolean( + TezConfiguration.TEZ_AM_CONTAINER_REUSE_ENABLED, + TezConfiguration.TEZ_AM_CONTAINER_REUSE_ENABLED_DEFAULT); + reuseRackLocal = conf.getBoolean( + TezConfiguration.TEZ_AM_CONTAINER_REUSE_RACK_FALLBACK_ENABLED, + TezConfiguration.TEZ_AM_CONTAINER_REUSE_RACK_FALLBACK_ENABLED_DEFAULT); + reuseNonLocal = conf + .getBoolean( + TezConfiguration.TEZ_AM_CONTAINER_REUSE_NON_LOCAL_FALLBACK_ENABLED, + TezConfiguration.TEZ_AM_CONTAINER_REUSE_NON_LOCAL_FALLBACK_ENABLED_DEFAULT); + Preconditions.checkArgument( + ((!reuseRackLocal && !reuseNonLocal) || (reuseRackLocal)), + "Re-use Rack-Local cannot be disabled if Re-use Non-Local has been" + + " enabled"); + + reuseNewContainers = shouldReuseContainers && conf.getBoolean( + TezConfiguration.TEZ_AM_CONTAINER_REUSE_NEW_CONTAINERS_ENABLED, + TezConfiguration.TEZ_AM_CONTAINER_REUSE_NEW_CONTAINERS_ENABLED_DEFAULT); + + localitySchedulingDelay = conf.getLong( + TezConfiguration.TEZ_AM_CONTAINER_REUSE_LOCALITY_DELAY_ALLOCATION_MILLIS, + TezConfiguration.TEZ_AM_CONTAINER_REUSE_LOCALITY_DELAY_ALLOCATION_MILLIS_DEFAULT); + Preconditions.checkArgument(localitySchedulingDelay >= 0, + "Locality Scheduling delay should be >=0"); + + idleContainerTimeoutMin = conf.getLong( + TezConfiguration.TEZ_AM_CONTAINER_IDLE_RELEASE_TIMEOUT_MIN_MILLIS, + TezConfiguration.TEZ_AM_CONTAINER_IDLE_RELEASE_TIMEOUT_MIN_MILLIS_DEFAULT); + Preconditions.checkArgument(idleContainerTimeoutMin >= 0 || idleContainerTimeoutMin == -1, + "Idle container release min timeout should be either -1 or >=0"); + + idleContainerTimeoutMax = conf.getLong( + TezConfiguration.TEZ_AM_CONTAINER_IDLE_RELEASE_TIMEOUT_MAX_MILLIS, + TezConfiguration.TEZ_AM_CONTAINER_IDLE_RELEASE_TIMEOUT_MAX_MILLIS_DEFAULT); + Preconditions.checkArgument( + idleContainerTimeoutMax >= 0 && idleContainerTimeoutMax >= idleContainerTimeoutMin, + "Idle container release max timeout should be >=0 and >= " + + TezConfiguration.TEZ_AM_CONTAINER_IDLE_RELEASE_TIMEOUT_MIN_MILLIS); + + sessionNumMinHeldContainers = conf.getInt(TezConfiguration.TEZ_AM_SESSION_MIN_HELD_CONTAINERS, + TezConfiguration.TEZ_AM_SESSION_MIN_HELD_CONTAINERS_DEFAULT); + Preconditions.checkArgument(sessionNumMinHeldContainers >= 0, + "Session minimum held containers should be >=0"); + + preemptionPercentage = conf.getInt(TezConfiguration.TEZ_AM_PREEMPTION_PERCENTAGE, + TezConfiguration.TEZ_AM_PREEMPTION_PERCENTAGE_DEFAULT); + Preconditions.checkArgument(preemptionPercentage >= 0 && preemptionPercentage <= 100, + "Preemption percentage should be between 0-100"); + + numHeartbeatsBetweenPreemptions = conf.getInt( + TezConfiguration.TEZ_AM_PREEMPTION_HEARTBEATS_BETWEEN_PREEMPTIONS, + TezConfiguration.TEZ_AM_PREEMPTION_HEARTBEATS_BETWEEN_PREEMPTIONS_DEFAULT); + Preconditions.checkArgument(numHeartbeatsBetweenPreemptions >= 1, + "Heartbeats between preemptions should be >=1"); + + preemptionMaxWaitTime = conf.getInt(TezConfiguration.TEZ_AM_PREEMPTION_MAX_WAIT_TIME_MS, + TezConfiguration.TEZ_AM_PREEMPTION_MAX_WAIT_TIME_MS_DEFAULT); + Preconditions.checkArgument(preemptionMaxWaitTime >=0, "Preemption max wait time must be >=0"); + + LOG.info("scheduler initialized with maxRMHeartbeatInterval:" + heartbeatIntervalMax + + " reuseEnabled:" + shouldReuseContainers + + " reuseRack:" + reuseRackLocal + + " reuseAny:" + reuseNonLocal + + " localityDelay:" + localitySchedulingDelay + + " preemptPercentage:" + preemptionPercentage + + " preemptMaxWaitTime:" + preemptionMaxWaitTime + + " numHeartbeatsBetweenPreemptions:" + numHeartbeatsBetweenPreemptions + + " idleContainerMinTimeout:" + idleContainerTimeoutMin + + " idleContainerMaxTimeout:" + idleContainerTimeoutMax + + " sessionMinHeldContainers:" + sessionNumMinHeldContainers); + } + + @Override + public void start() throws Exception { + super.start(); + client.start(); + if (shouldReuseContainers) { + reuseExecutor = createExecutor(); + } + TaskSchedulerContext ctx = getContext(); + RegisterApplicationMasterResponse response = client.registerApplicationMaster( + ctx.getAppHostName(), ctx.getAppClientPort(), ctx.getAppTrackingUrl()); + ctx.setApplicationRegistrationData(response.getMaximumResourceCapability(), + response.getApplicationACLs(), response.getClientToAMTokenMasterKey(), + response.getQueue()); + if (response.getSchedulerResourceTypes().contains(SchedulerResourceTypes.CPU)) { + resourceCalculator = new MemCpuResourceCalculator(); + } else { + resourceCalculator = new MemResourceCalculator(); + } + } + + protected ScheduledExecutorService createExecutor() { + return new ReuseContainerExecutor(); + } + + protected long now() { + return Time.monotonicNow(); + } + + @Override + public void initiateStop() { + super.initiateStop(); + LOG.debug("Initiating stop of task scheduler"); + stopRequested = true; + List releasedLaunchedContainers; + synchronized (this) { + releasedLaunchedContainers = new ArrayList<>(heldContainers.size()); + List heldList = new ArrayList<>(heldContainers.values()); + for (HeldContainer hc : heldList) { + if (releaseContainer(hc)) { + releasedLaunchedContainers.add(hc.getId()); + } + } + + List tasks = requestTracker.getTasks(); + for (Object task : tasks) { + removeTaskRequest(task); + } + } + + // perform app callback outside of locks + for (ContainerId id : releasedLaunchedContainers) { + getContext().containerBeingReleased(id); + } + } + + @Override + public void shutdown() throws Exception { + super.shutdown(); + if (reuseExecutor != null) { + reuseExecutor.shutdown(); + reuseExecutor.awaitTermination(2, TimeUnit.SECONDS); + } + synchronized (this) { + if (shouldUnregister && !hasUnregistered) { + TaskSchedulerContext.AppFinalStatus status = getContext().getFinalAppStatus(); + LOG.info("Unregistering from RM, exitStatus={} exitMessage={} trackingURL={}", + status.exitStatus, status.exitMessage, status.postCompletionTrackingUrl); + client.unregisterApplicationMaster(status.exitStatus, + status.exitMessage, + status.postCompletionTrackingUrl); + hasUnregistered = true; + } + } + client.stop(); + } + + @Override + public void onContainersAllocated(List containers) { + super.onContainersAllocated(containers); + + AMState appState = getContext().getAMState(); + if (stopRequested || appState == AMState.COMPLETED) { + LOG.info("Ignoring {} allocations since app is terminating", containers.size()); + for (Container c : containers) { + client.releaseAssignedContainer(c.getId()); + } + return; + } + List assignments = assignNewContainers(containers, getContext().getAMState(), getContext().isSession()); + informAppAboutAssignments(assignments); + } + + private synchronized List assignNewContainers(List newContainers, + AMState appState, boolean isSession) { + // try to assign the containers as node-local + List assignments = new ArrayList<>(newContainers.size()); + List unassigned = new ArrayList<>(newContainers.size()); + for (Container c : newContainers) { + HeldContainer hc = new HeldContainer(c); + heldContainers.put(hc.getId(), hc); + Resources.addTo(allocatedResources, c.getResource()); + tryAssignNewContainer(hc, hc.getHost(), assignments, unassigned); + } + + // try to assign the remaining containers as rack-local + List containers = unassigned; + unassigned = new ArrayList<>(containers.size()); + for (HeldContainer hc : containers) { + tryAssignNewContainer(hc, hc.getRack(), assignments, unassigned); + } + + // try to assign the remaining containers without locality + containers = unassigned; + unassigned = new ArrayList<>(containers.size()); + for (HeldContainer hc : containers) { + tryAssignNewContainer(hc, ResourceRequest.ANY, assignments, unassigned); + } + + for (HeldContainer hc : unassigned) { + if (reuseNewContainers) { + idleTracker.add(hc); + TaskRequest assigned = tryAssignReuseContainer(hc, appState, isSession); + if (assigned != null) { + assignments.add(new Assignment(assigned, hc.getContainer())); + } + } else { + releaseContainer(hc); + } + } + + return assignments; + } + + /** + * Try to assign a newly acquired container to a task of the same priority. + * + * @param hc the container to assign + * @param location the locality to consider for assignment + * @param assignments list to update if container is assigned + * @param unassigned list to update if container is not assigned + */ + @GuardedBy("this") + private void tryAssignNewContainer(HeldContainer hc, String location, + List assignments, List unassigned) { + List> results = client.getMatchingRequests(hc.getPriority(), + location, hc.getCapability()); + if (!results.isEmpty()) { + for (Collection requests : results) { + if (!requests.isEmpty()) { + TaskRequest request = requests.iterator().next(); + if (maybeChangeNode(request, hc.getContainer().getNodeId())) { + continue; + } + assignContainer(request, hc, location); + assignments.add(new Assignment(request, hc.getContainer())); + return; + } + } + } + + unassigned.add(hc); + } + + @GuardedBy("this") + @Nullable + private TaskRequest tryAssignReuseContainer(HeldContainer hc, + AMState appState, boolean isSession) { + if (stopRequested) { + return null; + } + + TaskRequest assignedRequest = null; + switch (appState) { + case IDLE: + handleReuseContainerWhenIdle(hc, isSession); + break; + case RUNNING_APP: + if (requestTracker.isEmpty()) { + // treat no requests as if app is idle + handleReuseContainerWhenIdle(hc, isSession); + } else { + assignedRequest = tryAssignReuseContainerAppRunning(hc); + if (assignedRequest == null) { + if (hc.atMaxMatchLevel()) { + LOG.info("Releasing idle container {} due to pending requests", hc.getId()); + releaseContainer(hc); + } else { + hc.scheduleForReuse(localitySchedulingDelay); + } + } + } + break; + case COMPLETED: + LOG.info("Releasing container {} because app has completed", hc.getId()); + releaseContainer(hc); + break; + default: + throw new IllegalStateException("Unexpected app state " + appState); + } + + return assignedRequest; + } + + @GuardedBy("this") + private void handleReuseContainerWhenIdle(HeldContainer hc, boolean isSession) { + if (isSession && sessionContainers.isEmpty() && sessionNumMinHeldContainers > 0) { + computeSessionContainers(); + } + + if (sessionContainers.contains(hc)) { + LOG.info("Retaining container {} since it is a session container", hc); + hc.resetMatchingLevel(); + } else { + long now = now(); + long expiration = hc.getIdleExpirationTimestamp(now); + if (now >= expiration) { + LOG.info("Releasing expired idle container {}", hc.getId()); + releaseContainer(hc); + } else { + hc.scheduleForReuse(expiration - now); + } + } + } + + @GuardedBy("this") + @Nullable + private TaskRequest tryAssignReuseContainerAppRunning(HeldContainer hc) { + if (!hc.isAssignable()) { + LOG.debug("Skipping scheduling of container {} because it state is {}", hc.getId(), hc.getState()); + return null; + } + + TaskRequest assignedRequest = tryAssignReuseContainerForAffinity(hc); + if (assignedRequest != null) { + return assignedRequest; + } + + for (Entry entry : requestTracker.getStatsEntries()) { + Priority priority = entry.getKey(); + RequestPriorityStats stats = entry.getValue(); + if (!stats.allowedVertices.intersects(stats.vertices)) { + LOG.debug("Skipping requests at priority {} because all requesting vertices are blocked by higher priority requests", + priority); + continue; + } + + String matchLocation = hc.getMatchingLocation(); + if (stats.localityCount <= 0) { + LOG.debug("Overriding locality match of container {} to ANY since there are no locality requests at priority {}", + hc.getId(), priority); + matchLocation = ResourceRequest.ANY; + } + assignedRequest = tryAssignReuseContainerForPriority(hc, matchLocation, + priority, stats.allowedVertices); + if (assignedRequest != null) { + break; + } + } + return assignedRequest; + } + + @GuardedBy("this") + @Nullable + private TaskRequest tryAssignReuseContainerForAffinity(HeldContainer hc) { + Collection affinities = hc.getAffinities(); + if (affinities != null) { + for (TaskRequest request : affinities) { + if (requestTracker.isRequestBlocked(request)) { + LOG.debug("Cannot assign task {} to container {} since vertex {} is a descendant of pending tasks", + request.getTask(), hc.getId(), request.getVertexIndex()); + } else if (maybeChangeNode(request, hc.getContainer().getNodeId())) { + LOG.debug("Cannot assign task {} to container {} since node {} is running sibling attempts", + request.getTask(), hc.getId(), request.getVertexIndex()); + } else { + assignContainer(request, hc, hc.getId()); + return request; + } + } + } + return null; + } + + @GuardedBy("this") + @Nullable + private TaskRequest tryAssignReuseContainerForPriority(HeldContainer hc, String matchLocation, + Priority priority, BitSet allowedVertices) { + List> results = client.getMatchingRequests(priority, matchLocation, hc.getCapability()); + if (results.isEmpty()) { + return null; + } + + for (Collection requests : results) { + for (TaskRequest request : requests) { + final int vertexIndex = request.getVertexIndex(); + if (!allowedVertices.get(vertexIndex)) { + LOG.debug("Not assigning task {} since it is a descendant of a pending vertex", request.getTask()); + continue; + } + + Object signature = hc.getSignature(); + if (signature == null || signatureMatcher.isSuperSet(signature, request.getContainerSignature())) { + if (!maybeChangeNode(request, hc.getContainer().getNodeId())) { + assignContainer(request, hc, matchLocation); + return request; + } + } + } + } + return null; + } + + private void informAppAboutAssignments(List assignments) { + if (!assignments.isEmpty()) { + for (Assignment a : assignments) { + informAppAboutAssignment(a.request, a.container); + } + } + } + + /** + * Inform the app about a task assignment. This should not be called with + * any locks held. + * + * @param request the corresponding task request + * @param container the container assigned to the task + */ + private void informAppAboutAssignment(TaskRequest request, Container container) { + if (blacklistedNodes.contains(container.getNodeId())) { + Object task = request.getTask(); + LOG.info("Container {} allocated for task {} on blacklisted node {}", + container.getId(), container.getNodeId(), task); + deallocateContainer(container.getId()); + // its ok to submit the same request again because the RM will not give us + // the bad/unhealthy nodes again. The nodes may become healthy/unblacklisted + // and so its better to give the RM the full information. + allocateTask(task, request.getCapability(), + (request.getNodes() == null ? null : + request.getNodes().toArray(new String[request.getNodes().size()])), + (request.getRacks() == null ? null : + request.getRacks().toArray(new String[request.getRacks().size()])), + request.getPriority(), + request.getContainerSignature(), + request.getCookie()); + } else { + getContext().taskAllocated(request.getTask(), request.getCookie(), container); + } + } + + @GuardedBy("this") + private void computeSessionContainers() { + Map rackHeldNumber = new HashMap<>(); + Map> nodeHeldContainers = new HashMap<>(); + for(HeldContainer heldContainer : heldContainers.values()) { + if (heldContainer.getSignature() == null) { + // skip containers that have not been launched as there is no process to reuse + continue; + } + MutableInt count = rackHeldNumber.get(heldContainer.getRack()); + if (count == null) { + count = new MutableInt(0); + rackHeldNumber.put(heldContainer.getRack(), count); + } + count.increment(); + String host = heldContainer.getHost(); + List nodeContainers = nodeHeldContainers.get(host); + if (nodeContainers == null) { + nodeContainers = new LinkedList<>(); + nodeHeldContainers.put(host, nodeContainers); + } + nodeContainers.add(heldContainer); + } + + Map rackToHoldNumber = new HashMap<>(); + for (String rack : rackHeldNumber.keySet()) { + rackToHoldNumber.put(rack, new MutableInt(0)); + } + + // distribute evenly across nodes + // the loop assigns 1 container per rack over all racks + int containerCount = 0; + while (containerCount < sessionNumMinHeldContainers && !rackHeldNumber.isEmpty()) { + Iterator> iter = rackHeldNumber.entrySet().iterator(); + while (containerCount < sessionNumMinHeldContainers && iter.hasNext()) { + Entry entry = iter.next(); + MutableInt rackCount = entry.getValue(); + rackCount.decrement(); + if (rackCount.intValue() >=0) { + containerCount++; + rackToHoldNumber.get(entry.getKey()).increment(); + } else { + iter.remove(); + } + } + } + + // distribute containers evenly across nodes while not exceeding rack limit + // the loop assigns 1 container per node over all nodes + containerCount = 0; + while (containerCount < sessionNumMinHeldContainers && !nodeHeldContainers.isEmpty()) { + Iterator>> iter = nodeHeldContainers.entrySet().iterator(); + while (containerCount < sessionNumMinHeldContainers && iter.hasNext()) { + List nodeContainers = iter.next().getValue(); + if (nodeContainers.isEmpty()) { + // node is empty. remove it. + iter.remove(); + continue; + } + HeldContainer heldContainer = nodeContainers.remove(nodeContainers.size() - 1); + MutableInt holdCount = rackToHoldNumber.get(heldContainer.getRack()); + holdCount.decrement(); + if (holdCount.intValue() >= 0) { + // rack can hold a container + containerCount++; + sessionContainers.add(heldContainer); + } else { + // rack limit reached. remove node. + iter.remove(); + } + } + } + + LOG.info("Identified {} session containers out of {} total containers", + sessionContainers.size(), heldContainers.size()); + } + + @GuardedBy("this") + private void activateSessionContainers() { + if (!sessionContainers.isEmpty()) { + for (HeldContainer hc : sessionContainers) { + if (hc.isAssignable()) { + hc.scheduleForReuse(localitySchedulingDelay); + } + } + sessionContainers.clear(); + } + } + + @Override + public void onContainersCompleted(List statuses) { + if (stopRequested) { + return; + } + + List taskStatusList = new ArrayList<>(statuses.size()); + synchronized (this) { + for (ContainerStatus status : statuses) { + ContainerId cid = status.getContainerId(); + LOG.info("Container {} completed with status {}", cid, status); + Object task = releasedContainers.remove(cid); + if (task == null) { + HeldContainer hc = heldContainers.get(cid); + if (hc != null) { + task = containerCompleted(hc); + } + } + if (task != null) { + taskStatusList.add(new TaskStatus(task, status)); + } + } + } + + // perform app callback outside of locks + for (TaskStatus taskStatus : taskStatusList) { + getContext().containerCompleted(taskStatus.task, taskStatus.status); + } + } + + @Override + public void onNodesUpdated(List updatedNodes) { + if (!stopRequested) { + getContext().nodesUpdated(updatedNodes); + } + } + + @Override + public float getProgress() { + if (stopRequested) { + return 1; + } + + Collection preemptedContainers; + synchronized (this) { + Resource freeResources = getAvailableResources(); + if (totalResources.getMemory() == 0) { + // assume this is the first allocate callback. nothing is allocated. + // available resource = totalResource + // TODO this will not handle dynamic changes in resources + totalResources = Resources.clone(freeResources); + LOG.info("App total resource memory: {} cpu: {} activeAssignments: {}", + totalResources.getMemory(), totalResources.getVirtualCores(), taskAssignments.size()); + } + + ++numHeartbeats; + if (LOG.isDebugEnabled() || numHeartbeats % 50 == 1) { + LOG.debug(constructPeriodicLog(freeResources)); + } + + preemptedContainers = maybePreempt(freeResources); + if (preemptedContainers != null && !preemptedContainers.isEmpty()) { + lastPreemptionHeartbeat = numHeartbeats; + } + } + + // perform app callback outside of locks + if (preemptedContainers != null && !preemptedContainers.isEmpty()) { + for (ContainerId cid : preemptedContainers) { + LOG.info("Preempting container {} currently allocated to a task", cid); + getContext().preemptContainer(cid); + } + } + + return getContext().getProgress(); + } + + @Override + public void onShutdownRequest() { + if (!stopRequested) { + getContext().appShutdownRequested(); + } + } + + @Override + public void onError(Throwable e) { + LOG.error("Error from ARMRMClient", e); + if (!stopRequested) { + getContext().reportError(YarnTaskSchedulerServiceError.RESOURCEMANAGER_ERROR, + StringUtils.stringifyException(e), null); + } + } + + @Override + public Resource getAvailableResources() { + Resource resource = client.getAvailableResources(); + return resource == null ? Resource.newInstance(0, 0) : resource; + } + + @Override + public Resource getTotalResources() { + return totalResources; + } + + @Override + public int getClusterNodeCount() { + return client.getClusterNodeCount(); + } + + @Override + public synchronized void blacklistNode(NodeId nodeId) { + LOG.info("Blacklisting node: {}", nodeId); + blacklistedNodes.add(nodeId); + client.updateBlacklist(Collections.singletonList(nodeId.getHost()), null); + } + + @Override + public synchronized void unblacklistNode(NodeId nodeId) { + if (blacklistedNodes.remove(nodeId)) { + LOG.info("Removing blacklist for node: {}", nodeId); + client.updateBlacklist(null, Collections.singletonList(nodeId.getHost())); + } + } + + @Override + public void allocateTask(Object task, Resource capability, String[] hosts, String[] racks, + Priority priority, Object containerSignature, Object clientCookie) { + int vertexIndex = getContext().getVertexIndexForTask(task); + TaskRequest request = new TaskRequest(task, vertexIndex, capability, hosts, racks, + priority, containerSignature, clientCookie); + addTaskRequest(request); + } + + @Override + public void allocateTask(Object task, Resource capability, ContainerId containerId, + Priority priority, Object containerSignature, Object clientCookie) { + String[] hosts = null; + synchronized (this) { + HeldContainer held = heldContainers.get(containerId); + if (held != null) { + if (held.canFit(capability)) { + hosts = new String[]{held.getHost()}; + } else { + LOG.warn("Match request to container {} but {} does not fit in {}", + containerId, capability, held.getCapability()); + containerId = null; + } + } else { + LOG.info("Ignoring match request to unknown container {}", containerId); + containerId = null; + } + } + int vertexIndex = getContext().getVertexIndexForTask(task); + TaskRequest request = new TaskRequest(task, vertexIndex, capability, hosts, null, + priority, containerSignature, clientCookie, containerId); + addTaskRequest(request); + } + + @Override + public boolean deallocateTask(Object task, boolean taskSucceeded, + TaskAttemptEndReason endReason, String diagnostics) { + ContainerId releasedLaunchedContainer = null; + AMState appState = getContext().getAMState(); + boolean isSession = getContext().isSession(); + TaskRequest newAssignment = null; + HeldContainer hc; + synchronized (this) { + TaskRequest request = removeTaskRequest(task); + if (request != null) { + LOG.debug("Deallocating task {} before it was allocated", task); + return false; + } + + hc = removeTaskAssignment(task); + if (hc != null) { + if (taskSucceeded && shouldReuseContainers) { + idleTracker.add(hc); + newAssignment = tryAssignReuseContainer(hc, appState, isSession); + if (newAssignment == null && hc.isReleasedAndUsed()) { + releasedLaunchedContainer = hc.getId(); + } + } else { + if (releaseContainer(hc)) { + releasedLaunchedContainer = hc.getId(); + } + } + } + } + + // perform app callback outside of locks + if (newAssignment != null) { + informAppAboutAssignment(newAssignment, hc.getContainer()); + return true; + } + if (releasedLaunchedContainer != null) { + getContext().containerBeingReleased(releasedLaunchedContainer); + return true; + } + return hc != null; + } + + @Override + public Object deallocateContainer(ContainerId containerId) { + Object task = null; + ContainerId releasedLaunchedContainer = null; + synchronized (this) { + HeldContainer hc = heldContainers.remove(containerId); + if (hc != null) { + task = hc.getAssignedTask(); + if (task != null) { + LOG.info("Deallocated container {} from task {}", containerId, task); + } + if (releaseContainer(hc)) { + releasedLaunchedContainer = hc.getId(); + } + } else { + LOG.info("Ignoring deallocation of unknown container {}", containerId); + } + } + + // perform app callback outside of locks + if (releasedLaunchedContainer != null) { + getContext().containerBeingReleased(releasedLaunchedContainer); + } + return task; + } + + @GuardedBy("this") + private void assignContainer(TaskRequest request, HeldContainer hc, Object match) { + LOG.info("Assigning container {} to task {} host={} priority={} capability={} match={} lastTask={}", + hc.getId(), request.getTask(), hc.getHost(), hc.getPriority(), hc.getCapability(), match, hc.getLastTask()); + removeTaskRequest(request.getTask()); + addTaskAssignment(request, hc); + idleTracker.remove(hc); + } + + private synchronized boolean releaseContainer(HeldContainer hc) { + Object task = containerCompleted(hc); + client.releaseAssignedContainer(hc.getId()); + if (task != null) { + releasedContainers.put(hc.getId(), task); + return true; + } + return false; + } + + @GuardedBy("this") + private void addTaskAssignment(TaskRequest request, HeldContainer hc) { + HeldContainer oldContainer = taskAssignments.put(request.getTask(), hc); + if (oldContainer != null) { + LOG.error("Task {} being assigned to container {} but was already assigned to container {}", + request.getTask(), hc.getId(), oldContainer.getId()); + } + Integer vertexIndex = request.vertexIndex; + Set cset = vertexAssignments.get(vertexIndex); + if (cset == null) { + cset = new HashSet<>(); + vertexAssignments.put(vertexIndex, cset); + assignedVertices.set(vertexIndex); + } + cset.add(hc); + if (!hc.isNew()) { + getContext().containerReused(hc.getContainer()); + } + hc.assignTask(request); + } + + @GuardedBy("this") + private HeldContainer removeTaskAssignment(Object task) { + HeldContainer hc = taskAssignments.remove(task); + if (hc != null) { + TaskRequest request = hc.removeAssignment(); + if (request != null) { + Integer vertexIndex = request.vertexIndex; + Set cset = vertexAssignments.get(vertexIndex); + if (cset != null && cset.remove(hc) && cset.isEmpty()) { + vertexAssignments.remove(vertexIndex); + assignedVertices.clear(vertexIndex); + } + } else { + LOG.error("Container {} had assigned task {} but no request?!?", hc.getId(), task); + } + } + return hc; + } + + @GuardedBy("this") + @Nullable + private Object containerCompleted(HeldContainer hc) { + idleTracker.remove(hc); + heldContainers.remove(hc.getId()); + Resources.subtractFrom(allocatedResources, hc.getCapability()); + removeTaskAssignment(hc.getAssignedTask()); + hc.released(); + return hc.getLastTask(); + } + + @GuardedBy("this") + private void ensureVertexDescendants() { + if (vertexDescendants == null) { + DagInfo info = getContext().getCurrentDagInfo(); + if (info == null) { + throw new IllegalStateException("Scheduling tasks but no current DAG info?"); + } + int numVertices = info.getTotalVertices(); + ArrayList descendants = new ArrayList<>(numVertices); + for (int i = 0; i < numVertices; ++i) { + descendants.add(info.getVertexDescendants(i)); + } + vertexDescendants = descendants; + } + } + + private void addTaskRequest(TaskRequest request) { + Container assignedContainer = null; + synchronized (this) { + if (shouldReuseContainers && !stopRequested && getContext().getAMState() != AMState.COMPLETED) { + ensureVertexDescendants(); + activateSessionContainers(); + HeldContainer hc = tryAssignTaskToIdleContainer(request); + if (hc != null) { + assignedContainer = hc.getContainer(); + } + } + + if (assignedContainer == null) { + ensureVertexDescendants(); + TaskRequest old = requestTracker.add(request); + if (old != null) { + removeTaskRequestByRequest(request); + } + client.addContainerRequest(request); + + HeldContainer hc = heldContainers.get(request.getAffinity()); + if (hc != null) { + hc.addAffinity(request); + } + } + } + + // perform app callback outside of locks + if (assignedContainer != null) { + informAppAboutAssignment(request, assignedContainer); + } + } + + @Nullable + private synchronized TaskRequest removeTaskRequest(Object task) { + TaskRequest request = requestTracker.remove(task); + if (request != null) { + removeTaskRequestByRequest(request); + } + return request; + } + + @GuardedBy("this") + private void removeTaskRequestByRequest(TaskRequest request) { + client.removeContainerRequest(request); + HeldContainer hc = heldContainers.get(request.getAffinity()); + if (hc != null) { + hc.removeAffinity(request); + } + } + + @GuardedBy("this") + @Nullable + private HeldContainer tryAssignTaskToIdleContainer(TaskRequest request) { + if (requestTracker.isRequestBlocked(request)) { + LOG.debug("Cannot assign task {} to an idle container since vertex {} is a descendant of pending tasks", + request.getTask(), request.getVertexIndex()); + return null; + } + + // check if container affinity can be satisfied immediately + ContainerId affinity = request.getAffinity(); + if (affinity != null) { + HeldContainer hc = heldContainers.get(affinity); + if (hc != null && hc.isAssignable() && !maybeChangeNode(request, hc.getContainer().getNodeId())) { + assignContainer(request, hc, affinity); + return hc; + } + } + + // try to match the task against idle containers in order from best locality to worst + HeldContainer hc; + if (request.hasLocality()) { + hc = tryAssignTaskToIdleContainer(request, request.getNodes(), HeldContainerState.MATCHES_LOCAL_STATES); + if (hc == null) { + hc = tryAssignTaskToIdleContainer(request, request.getRacks(), HeldContainerState.MATCHES_RACK_STATES); + if (hc == null) { + hc = tryAssignTaskToIdleContainer(request, ResourceRequest.ANY, HeldContainerState.MATCHES_ANY_STATES); + } + } + } else { + hc = tryAssignTaskToIdleContainer(request, ResourceRequest.ANY, HeldContainerState.MATCHES_LOCAL_STATES); + } + + return hc; + } + + @GuardedBy("this") + @Nullable + private HeldContainer tryAssignTaskToIdleContainer(TaskRequest request, + List locations, EnumSet eligibleStates) { + if (locations != null && !locations.isEmpty()) { + for (String location : locations) { + HeldContainer hc = tryAssignTaskToIdleContainer(request, location, eligibleStates); + if (hc != null) { + return hc; + } + } + } + return null; + } + + @GuardedBy("this") + @Nullable + private HeldContainer tryAssignTaskToIdleContainer(TaskRequest request, + String location, EnumSet eligibleStates) { + Set containers = idleTracker.getByLocation(location); + HeldContainer bestMatch = null; + if (containers != null && !containers.isEmpty()) { + for (HeldContainer hc : containers) { + if (eligibleStates.contains(hc.getState())) { + Object csig = hc.getSignature(); + if (csig == null || signatureMatcher.isSuperSet(csig, request.getContainerSignature())) { + boolean needToChangeNode = maybeChangeNode(request, hc.getContainer().getNodeId()); + int numAffinities = hc.getNumAffinities(); + if (numAffinities == 0 && !needToChangeNode) { + bestMatch = hc; + break; + } + if ((bestMatch == null || numAffinities < bestMatch.getNumAffinities()) && !needToChangeNode) { + bestMatch = hc; + } + } else { + LOG.debug("Unable to assign task {} to container {} due to signature mismatch", request.getTask(), hc.getId()); + } + } + } + } + if (bestMatch != null) { + assignContainer(request, bestMatch, location); + } + return bestMatch; + } + + private boolean maybeChangeNode(TaskRequest request, NodeId nodeId) { + Object task = request.getTask(); + if (task instanceof TaskAttempt) { + Set nodesWithSiblingRunningAttempts = ((TaskAttempt) task).getTask().getNodesWithRunningAttempts(); + if (nodesWithSiblingRunningAttempts != null + && nodesWithSiblingRunningAttempts.contains(nodeId)) { + return true; + } + } + return false; + } + + @Override + public void setShouldUnregister() { + shouldUnregister = true; + } + + @Override + public boolean hasUnregistered() { + return hasUnregistered; + } + + @Override + public synchronized void dagComplete() { + for (HeldContainer hc : sessionContainers) { + hc.resetMatchingLevel(); + } + vertexDescendants = null; + } + + @GuardedBy("this") + @Nullable + private Collection maybePreempt(Resource freeResources) { + if (preemptionPercentage == 0 || numHeartbeats - lastPreemptionHeartbeat < numHeartbeatsBetweenPreemptions) { + return null; + } + if (!requestTracker.isPreemptionDeadlineExpired() && requestTracker.fitsHighestPriorityRequest(freeResources)) { + if (numHeartbeats % 50 == 1) { + LOG.info("Highest priority request fits in free resources {}", freeResources); + } + return null; + } + + int numIdleContainers = idleTracker.getNumContainers(); + if (numIdleContainers > 0) { + if (numHeartbeats % 50 == 1) { + LOG.info("Avoiding preemption since there are {} idle containers", numIdleContainers); + } + return null; + } + + BitSet blocked = requestTracker.createVertexBlockedSet(); + if (!blocked.intersects(assignedVertices)) { + if (numHeartbeats % 50 == 1) { + LOG.info("Avoiding preemption since there are no descendants of the highest priority requests running"); + } + return null; + } + + Resource preemptLeft = requestTracker.getAmountToPreempt(preemptionPercentage); + if (!resourceCalculator.anyAvailable(preemptLeft)) { + if (numHeartbeats % 50 == 1) { + LOG.info("Avoiding preemption since amount to preempt is {}", preemptLeft); + } + return null; + } + + PriorityQueue candidates = new PriorityQueue<>(11, PREEMPT_ORDER_COMPARATOR); + blocked.and(assignedVertices); + for (int i = blocked.nextSetBit(0); i >= 0; i = blocked.nextSetBit(i + 1)) { + Collection containers = vertexAssignments.get(i); + if (containers != null) { + candidates.addAll(containers); + } else { + LOG.error("Vertex {} in assignedVertices but no assignments?", i); + } + } + + ArrayList preemptedContainers = new ArrayList<>(); + HeldContainer hc; + while ((hc = candidates.poll()) != null) { + LOG.info("Preempting container {} currently allocated to task {}", hc.getId(), hc.getAssignedTask()); + preemptedContainers.add(hc.getId()); + resourceCalculator.deductFrom(preemptLeft, hc.getCapability()); + if (!resourceCalculator.anyAvailable(preemptLeft)) { + break; + } + } + + return preemptedContainers; + } + + @GuardedBy("this") + private String constructPeriodicLog(Resource freeResource) { + Priority highestPriority = requestTracker.getHighestPriority(); + return "Allocated: " + allocatedResources + + " Free: " + freeResource + + " pendingRequests: " + requestTracker.getNumRequests() + + " heldContainers: " + heldContainers.size() + + " heartbeats: " + numHeartbeats + + " lastPreemptionHeartbeat: " + lastPreemptionHeartbeat + + ((highestPriority != null) ? + (" highestWaitingRequestWaitStartTime: " + requestTracker.getHighestPriorityWaitTimestamp() + + " highestWaitingRequestPriority: " + highestPriority) : ""); + } + + @VisibleForTesting + int getNumBlacklistedNodes() { + return blacklistedNodes.size(); + } + + @VisibleForTesting + Collection getSessionContainers() { + return sessionContainers; + } + + // Wrapper class to work around lack of blacklisting APIs in async client. + // This can be removed once Tez requires YARN >= 2.7.0 + static class AMRMClientAsyncWrapper extends AMRMClientAsyncImpl { + AMRMClientAsyncWrapper(AMRMClient syncClient, int intervalMs, CallbackHandler handler) { + super(syncClient, intervalMs, handler); + } + + public void updateBlacklist(List additions, List removals) { + client.updateBlacklist(additions, removals); + } + } + + /** + * A utility class to track a task allocation. + */ + static class TaskRequest extends AMRMClient.ContainerRequest { + final Object task; + final int vertexIndex; + final Object signature; + final Object cookie; + final ContainerId affinityContainerId; + + TaskRequest(Object task, int vertexIndex, Resource capability, String[] hosts, String[] racks, + Priority priority, Object signature, Object cookie) { + this(task, vertexIndex, capability, hosts, racks, priority, signature, cookie, null); + } + + TaskRequest(Object task, int vertexIndex, Resource capability, String[] hosts, String[] racks, + Priority priority, Object signature, Object cookie, ContainerId affinityContainerId) { + super(capability, hosts, racks, priority); + this.task = task; + this.vertexIndex = vertexIndex; + this.signature = signature; + this.cookie = cookie; + this.affinityContainerId = affinityContainerId; + } + + Object getTask() { + return task; + } + + int getVertexIndex() { + return vertexIndex; + } + + Object getContainerSignature() { + return signature; + } + + Object getCookie() { + return cookie; + } + + @Nullable + ContainerId getAffinity() { + return affinityContainerId; + } + + boolean hasLocality() { + List nodes = getNodes(); + List racks = getRacks(); + return (nodes != null && !nodes.isEmpty()) || (racks != null && !racks.isEmpty()); + } + } + + private enum HeldContainerState { + MATCHING_LOCAL(true), + MATCHING_RACK(true), + MATCHING_ANY(true), + ASSIGNED(false), + RELEASED(false); + + private static final EnumSet MATCHES_LOCAL_STATES = EnumSet.of( + HeldContainerState.MATCHING_LOCAL, HeldContainerState.MATCHING_RACK, HeldContainerState.MATCHING_ANY); + private static final EnumSet MATCHES_RACK_STATES = EnumSet.of( + HeldContainerState.MATCHING_RACK, HeldContainerState.MATCHING_ANY); + private static final EnumSet MATCHES_ANY_STATES = EnumSet.of(HeldContainerState.MATCHING_ANY); + + private final boolean assignable; + + HeldContainerState(boolean assignable) { + this.assignable = assignable; + } + + boolean isAssignable() { + return assignable; + } + } + + /** + * Tracking for an allocated container. + */ + @VisibleForTesting + class HeldContainer implements Callable { + final Container container; + final String rack; + @GuardedBy("DagAwareYarnTaskScheduler.this") + HeldContainerState state = HeldContainerState.MATCHING_LOCAL; + + /** The Future received when scheduling an idle container for re-allocation at a later time. */ + @GuardedBy("DagAwareYarnTaskScheduler.this") + Future future = null; + + /** The collection of task requests that have specified this container as a scheduling affinity. */ + @GuardedBy("DagAwareYarnTaskScheduler.this") + Collection affinities = null; + + /** + * The task request corresponding to the currently assigned task to this container. + * This field is null when the container is not currently assigned. + */ + @GuardedBy("DagAwareYarnTaskScheduler.this") + TaskRequest assignedRequest = null; + + /** The task request corresponding to the last task that was assigned to this container. */ + @GuardedBy("DagAwareYarnTaskScheduler.this") + TaskRequest lastRequest = null; + + /** The timestamp when the idle container will expire. 0 if the container is not idle. */ + @GuardedBy("DagAwareYarnTaskScheduler.this") + long idleExpirationTimestamp = 0; + + /** The timestamp when this container was assigned. 0 if the container is not assigned. */ + @GuardedBy("DagAwareYarnTaskScheduler.this") + long assignmentTimestamp = 0; + + HeldContainer(Container container) { + this.container = container; + this.rack = RackResolver.resolve(container.getNodeId().getHost()).getNetworkLocation(); + } + + HeldContainerState getState() { + return state; + } + + boolean isAssignable() { + return state.isAssignable(); + } + + boolean isReleasedAndUsed() { + return state == HeldContainerState.RELEASED && getLastTask() != null; + } + + Container getContainer() { + return container; + } + + ContainerId getId() { + return container.getId(); + } + + String getHost() { + return container.getNodeId().getHost(); + } + + String getRack() { + return rack; + } + + Priority getPriority() { + return container.getPriority(); + } + + Resource getCapability() { + return container.getResource(); + } + + @Nullable + Object getAssignedTask() { + return assignedRequest != null ? assignedRequest.getTask() : null; + } + + void assignTask(TaskRequest request) { + assert state != HeldContainerState.ASSIGNED && state != HeldContainerState.RELEASED; + if (assignedRequest != null) { + LOG.error("Container {} assigned task {} but already running task {}", + getId(), request.getTask(), assignedRequest.getTask()); + } + assignedRequest = request; + lastRequest = request; + state = HeldContainerState.ASSIGNED; + idleExpirationTimestamp = 0; + assignmentTimestamp = now(); + if (future != null) { + future.cancel(false); + future = null; + } + } + + TaskRequest removeAssignment() { + assert state == HeldContainerState.ASSIGNED; + TaskRequest result = assignedRequest; + assignedRequest = null; + assignmentTimestamp = 0; + state = HeldContainerState.MATCHING_LOCAL; + return result; + } + + void addAffinity(TaskRequest request) { + if (affinities == null) { + affinities = new HashSet<>(); + } + affinities.add(request); + } + + void removeAffinity(TaskRequest request) { + if (affinities != null && affinities.remove(request) && affinities.isEmpty()) { + affinities = null; + } + } + + int getNumAffinities() { + return affinities != null ? affinities.size() : 0; + } + + @Nullable + Collection getAffinities() { + return affinities; + } + + void scheduleForReuse(long delayMillis) { + assert state != HeldContainerState.ASSIGNED && state != HeldContainerState.RELEASED; + try { + if (future != null) { + future.cancel(false); + } + future = reuseExecutor.schedule(this, delayMillis, TimeUnit.MILLISECONDS); + } catch (RejectedExecutionException e) { + if (!stopRequested) { + LOG.error("Container {} could not be scheduled for reuse!", getId(), e); + } + } + } + + @Nullable + Object getSignature() { + return lastRequest != null ? lastRequest.getContainerSignature() : null; + } + + @Nullable + Object getLastTask() { + return lastRequest != null ? lastRequest.getTask() : null; + } + + boolean isNew() { + return lastRequest == null; + } + + String getMatchingLocation() { + switch (state) { + case MATCHING_LOCAL: + return getHost(); + case MATCHING_RACK: + return getRack(); + case MATCHING_ANY: + return ResourceRequest.ANY; + default: + throw new IllegalStateException("Container " + getId() + " trying to match in state " + state); + } + } + + void moveToNextMatchingLevel() { + switch (state) { + case MATCHING_LOCAL: + if (reuseRackLocal) { + state = HeldContainerState.MATCHING_RACK; + } + break; + case MATCHING_RACK: + if (reuseNonLocal) { + state = HeldContainerState.MATCHING_ANY; + } + break; + case MATCHING_ANY: + break; + default: + throw new IllegalStateException("Container " + getId() + " trying to match in state " + state); + } + } + + boolean atMaxMatchLevel() { + switch (state) { + case MATCHING_LOCAL: + return !reuseRackLocal; + case MATCHING_RACK: + return !reuseNonLocal; + case MATCHING_ANY: + return true; + default: + throw new IllegalStateException("Container " + getId() + " trying to match in state " + state); + } + } + + void resetMatchingLevel() { + if (isAssignable()) { + state = HeldContainerState.MATCHING_LOCAL; + } + } + + long getIdleExpirationTimestamp(long now) { + if (idleExpirationTimestamp == 0) { + if (idleContainerTimeoutMin > 0) { + idleExpirationTimestamp = now + (idleContainerTimeoutMin == idleContainerTimeoutMax ? idleContainerTimeoutMin + : ThreadLocalRandom.current().nextLong(idleContainerTimeoutMin, idleContainerTimeoutMax)); + } else { + idleExpirationTimestamp = Long.MAX_VALUE; + } + } + return idleExpirationTimestamp; + } + + long getAssignmentTimestamp() { + return assignmentTimestamp; + } + + boolean canFit(Resource capability) { + Resource cr = container.getResource(); + return cr.getMemory() >= capability.getMemory() && cr.getVirtualCores() >= capability.getVirtualCores(); + } + + @Override + public Void call() throws Exception { + AMState appState = getContext().getAMState(); + boolean isSession = getContext().isSession(); + TaskRequest assigned = null; + ContainerId released = null; + synchronized (DagAwareYarnTaskScheduler.this) { + future = null; + if (isAssignable()) { + moveToNextMatchingLevel(); + assigned = tryAssignReuseContainer(this, appState, isSession); + if (assigned == null && isReleasedAndUsed()) { + released = getId(); + } + } + } + if (assigned != null) { + informAppAboutAssignment(assigned, container); + } + if (released != null) { + getContext().containerBeingReleased(released); + } + return null; + } + + void released() { + assert state != HeldContainerState.RELEASED; + state = HeldContainerState.RELEASED; + if (future != null) { + future.cancel(false); + } + future = null; + } + } + + /** + * Utility comparator to order containers by assignment timestamp from + * most recent to least recent. + */ + private static class PreemptOrderComparator implements Comparator { + @Override + public int compare(HeldContainer o1, HeldContainer o2) { + long timestamp1 = o1.getAssignmentTimestamp(); + if (timestamp1 == 0) { + timestamp1 = Long.MAX_VALUE; + } + long timestamp2 = o2.getAssignmentTimestamp(); + if (timestamp2 == 0) { + timestamp2 = Long.MAX_VALUE; + } + return Long.compare(timestamp2, timestamp1); + } + } + + /** + * Utility class for a request, container pair + */ + private static class Assignment { + final TaskRequest request; + final Container container; + + Assignment(TaskRequest request, Container container) { + this.request = request; + this.container = container; + } + } + + /** + * Utility class for a task, container exit status pair + */ + private static class TaskStatus { + final Object task; + final ContainerStatus status; + + TaskStatus(Object task, ContainerStatus status) { + this.task = task; + this.status = status; + } + } + + /** + * The task allocation request tracker tracks task allocations + * and keeps statistics on which priorities have requests and which vertices + * should be blocked from container reuse due to DAG topology. + */ + private class RequestTracker { + private final Map requests = new HashMap<>(); + /** request map ordered by priority with highest priority first */ + private final NavigableMap priorityStats = + new TreeMap<>(Collections.reverseOrder()); + private Priority highestPriority = null; + private long highestPriorityWaitTimestamp = 0; + + @GuardedBy("DagAwareYarnTaskScheduler.this") + @Nullable + TaskRequest add(TaskRequest request) { + TaskRequest oldRequest = requests.put(request.getTask(), request); + Priority priority = request.getPriority(); + RequestPriorityStats stats = priorityStats.get(priority); + if (stats == null) { + stats = addStatsForPriority(priority); + } + ++stats.requestCount; + if (request.hasLocality()) { + ++stats.localityCount; + } + incrVertexTaskCount(priority, stats, request.getVertexIndex()); + + if (oldRequest != null) { + updateStatsForRemoval(oldRequest); + } + return oldRequest; + } + + @GuardedBy("DagAwareYarnTaskScheduler.this") + @Nullable + TaskRequest remove(Object task) { + TaskRequest request = requests.remove(task); + if (request != null) { + updateStatsForRemoval(request); + return request; + } + return null; + } + + private RequestPriorityStats addStatsForPriority(Priority priority) { + BitSet allowedVerts = new BitSet(vertexDescendants.size()); + Entry lowerEntry = priorityStats.lowerEntry(priority); + if (lowerEntry != null) { + // initialize the allowed vertices BitSet using the information derived + // from the next higher priority entry + RequestPriorityStats priorStats = lowerEntry.getValue(); + allowedVerts.or(priorStats.allowedVertices); + allowedVerts.andNot(priorStats.descendants); + } else { + // no higher priority entry so this priority is currently the highest + highestPriority = priority; + highestPriorityWaitTimestamp = now(); + allowedVerts.set(0, vertexDescendants.size()); + } + RequestPriorityStats stats = new RequestPriorityStats(vertexDescendants.size(), allowedVerts); + priorityStats.put(priority, stats); + return stats; + } + + private void updateStatsForRemoval(TaskRequest request) { + Priority priority = request.getPriority(); + RequestPriorityStats stats = priorityStats.get(priority); + decrVertexTaskCount(priority, stats, request.getVertexIndex()); + --stats.requestCount; + if (request.hasLocality()) { + --stats.localityCount; + } + if (stats.requestCount == 0) { + priorityStats.remove(priority); + if (highestPriority.equals(priority)) { + if (priorityStats.isEmpty()) { + highestPriority = null; + highestPriorityWaitTimestamp = 0; + } else { + highestPriority = priorityStats.firstKey(); + highestPriorityWaitTimestamp = now(); + } + } + } + } + + @GuardedBy("DagAwareYarnTaskScheduler.this") + boolean isEmpty() { + return requests.isEmpty(); + } + + @GuardedBy("DagAwareYarnTaskScheduler.this") + int getNumRequests() { + return requests.size(); + } + + @GuardedBy("DagAwareYarnTaskScheduler.this") + List getTasks() { + return new ArrayList<>(requests.keySet()); + } + + @GuardedBy("DagAwareYarnTaskScheduler.this") + Collection> getStatsEntries() { + return priorityStats.entrySet(); + } + + @GuardedBy("DagAwareYarnTaskScheduler.this") + @Nullable + Priority getHighestPriority() { + if (priorityStats.isEmpty()) { + return null; + } + return priorityStats.firstKey(); + } + + @GuardedBy("DagAwareYarnTaskScheduler.this") + long getHighestPriorityWaitTimestamp() { + return highestPriorityWaitTimestamp; + } + + @GuardedBy("DagAwareYarnTaskScheduler.this") + boolean isRequestBlocked(TaskRequest request) { + Entry entry = priorityStats.floorEntry(request.getPriority()); + if (entry != null) { + RequestPriorityStats stats = entry.getValue(); + int vertexIndex = request.getVertexIndex(); + return !stats.allowedVertices.get(vertexIndex) || stats.descendants.get(vertexIndex); + } + return false; + } + + private void incrVertexTaskCount(Priority priority, RequestPriorityStats stats, int vertexIndex) { + Integer vertexIndexInt = vertexIndex; + MutableInt taskCount = stats.vertexTaskCount.get(vertexIndexInt); + if (taskCount != null) { + taskCount.increment(); + } else { + addVertexToRequestStats(priority, stats, vertexIndexInt); + } + } + + private void decrVertexTaskCount(Priority priority, RequestPriorityStats stats, int vertexIndex) { + Integer vertexIndexInt = vertexIndex; + MutableInt taskCount = stats.vertexTaskCount.get(vertexIndexInt); + taskCount.decrement(); + if (taskCount.intValue() <= 0) { + removeVertexFromRequestStats(priority, stats, vertexIndexInt); + } + } + + /** + * Add a new vertex to a RequestPriorityStats. + * + * Adding a vertex to the request stats requires updating the stats descendants bitmask to include the descendants + * of the new vertex and also updating the allowedVertices bitmask for all lower priority requests to prevent any + * task request from a descendant vertex in the DAG from being allocated. This avoids assigning allocations to + * lower priority requests when a higher priority request of an ancestor is still pending, but it allows lower + * priority requests to be satisfied if higher priority requests are not ancestors. This is particularly useful + * for DAGs that have independent trees of vertices or significant, parallel branches within a tree. + * + * Requests are blocked by taking the specified vertex's full descendant vertex bitmask in vertexDescendants and + * clearing those bits for all lower priority requests. For the following example DAG where each vertex index + * corresponds to its letter position (i.e.: A=0, B=1, C=2, etc.) + * + * A + * | + * C---B----E + * | | + * D F + * | + * G---H + * + * Vertices F, G, and H are descendants of E but all other vertices are not. The vertexDescendants bitmask for + * vertex E is therefore 11100000b or 0xE0. When the first vertex E task request arrives we need to disallow + * requests for all descendants of E. That is accomplished by iterating through the request stats for all lower + * priority requests and clearing the allowedVertex bits corresponding to the descendants, + * i.e: allowedVertices = allowedVertices & ~descendants + */ + private void addVertexToRequestStats(Priority priority, RequestPriorityStats stats, Integer vertexIndexInt) { + // Creating a new vertex entry for this priority, so the allowed vertices for all + // lower priorities need to be updated based on the descendants of the new vertex. + stats.vertexTaskCount.put(vertexIndexInt, new MutableInt(1)); + int vertexIndex = vertexIndexInt; + stats.vertices.set(vertexIndex); + BitSet d = vertexDescendants.get(vertexIndex); + stats.descendants.or(d); + for (RequestPriorityStats lowerStat : priorityStats.tailMap(priority, false).values()) { + lowerStat.allowedVertices.andNot(d); + } + } + + /** + * Removes a vertex from a RequestPriorityStats. + * + * Removing a vertex is more expensive than adding a vertex. The stats contain bitmasks which only store on/off + * values rather than reference counts. Therefore we must rebuild the descendants bitmasks from the remaining + * vertices in the request stats. Once the new descendants mask is computed we then need to rebuild the + * allowedVertices BitSet for all lower priority request stats in case the removal of this vertex unblocks lower + * priority requests of a descendant vertex. + * + * Rebuilding allowedVertices for the lower priorities involves starting with the allowedVertices mask at the + * current priority then masking off the descendants at each priority level encountered, accumulating the results. + * Any descendants of a level will be blocked at all lower levels. See the addVertexToRequestStats documentation + * for details on how vertices map to the descendants and allowedVertices bit masks. + */ + private void removeVertexFromRequestStats(Priority priority, RequestPriorityStats stats, Integer vertexIndexInt) { + stats.vertexTaskCount.remove(vertexIndexInt); + int vertexIndex = vertexIndexInt; + stats.vertices.clear(vertexIndex); + + // Rebuild the descendants BitSet for the remaining vertices at this priority. + stats.descendants.clear(); + for (Integer vIndex : stats.vertexTaskCount.keySet()) { + stats.descendants.or(vertexDescendants.get(vIndex)); + } + + // The allowedVertices for all lower priorities need to be recalculated where the vertex descendants at each + // level are removed from the list of allowed vertices at all subsequent levels. + Collection tailStats = priorityStats.tailMap(priority, false).values(); + if (!tailStats.isEmpty()) { + BitSet cumulativeAllowed = new BitSet(vertexDescendants.size()); + cumulativeAllowed.or(stats.allowedVertices); + cumulativeAllowed.andNot(stats.descendants); + for (RequestPriorityStats s : tailStats) { + s.allowedVertices.clear(); + s.allowedVertices.or(cumulativeAllowed); + cumulativeAllowed.andNot(s.descendants); + } + } + } + + @GuardedBy("DagAwareYarnTaskScheduler.this") + boolean isPreemptionDeadlineExpired() { + return highestPriorityWaitTimestamp != 0 + && now() - highestPriorityWaitTimestamp > preemptionMaxWaitTime; + } + + @GuardedBy("DagAwareYarnTaskScheduler.this") + boolean fitsHighestPriorityRequest(Resource freeResources) { + if (priorityStats.isEmpty()) { + return true; + } + Priority priority = priorityStats.firstKey(); + List requestsList = client.getMatchingRequests( + priority, ResourceRequest.ANY, freeResources); + return !requestsList.isEmpty(); + } + + @GuardedBy("DagAwareYarnTaskScheduler.this") + Resource getAmountToPreempt(int preemptionPercentage) { + if (priorityStats.isEmpty()) { + return Resources.none(); + } + Priority priority = priorityStats.firstKey(); + List> requestsList = client.getMatchingRequests( + priority, ResourceRequest.ANY, Resources.unbounded()); + int numRequests = 0; + for (Collection requests : requestsList) { + numRequests += requests.size(); + } + numRequests = (int) Math.ceil(numRequests * (preemptionPercentage / 100.f)); + Resource toPreempt = Resource.newInstance(0, 0); + if (numRequests != 0) { + outer_loop: + for (Collection requests : requestsList) { + for (TaskRequest request : requests) { + Resources.addTo(toPreempt, request.getCapability()); + if (--numRequests == 0) { + break outer_loop; + } + } + } + } + return toPreempt; + } + + // Create a new BitSet that represents all of the vertices that should not be + // scheduled due to outstanding requests from higher priority predecessor vertices. + @GuardedBy("DagAwareYarnTaskScheduler.this") + BitSet createVertexBlockedSet() { + BitSet blocked = new BitSet(vertexDescendants.size()); + Entry entry = priorityStats.lastEntry(); + if (entry != null) { + RequestPriorityStats stats = entry.getValue(); + blocked.or(stats.allowedVertices); + blocked.flip(0, blocked.size()); + blocked.or(stats.descendants); + } + return blocked; + } + } + + /** + * Tracks statistics on vertices that are requesting tasks at a particular priority + */ + private static class RequestPriorityStats { + /** Map from vertex ID to number of task requests for that vertex */ + final Map vertexTaskCount = new HashMap<>(); + /** BitSet of vertices that have oustanding requests at this priority */ + final BitSet vertices; + /** BitSet of vertices that are descendants of this vertex */ + final BitSet descendants; + /** + * BitSet of vertices that are allowed to be scheduled at this priority + * (i.e.: no oustanding predecessors requesting at higher priorities) + */ + final BitSet allowedVertices; + int requestCount = 0; + int localityCount = 0; + + RequestPriorityStats(int numTotalVertices, BitSet allowedVertices) { + this.vertices = new BitSet(numTotalVertices); + this.descendants = new BitSet(numTotalVertices); + this.allowedVertices = allowedVertices; + } + } + + /** + * Tracks idle containers and facilitates faster matching of task requests + * against those containers given a desired location. + */ + private static class IdleContainerTracker { + /** + * Map of location ID (e.g.: a specific host, rack, or ANY) to set of + * idle containers matching that location + */ + final Map> containersByLocation = new HashMap<>(); + int numContainers = 0; + + @GuardedBy("DagAwareYarnTaskScheduler.this") + void add(HeldContainer hc) { + add(hc, hc.getHost()); + add(hc, hc.getRack()); + add(hc, ResourceRequest.ANY); + ++numContainers; + } + + @GuardedBy("DagAwareYarnTaskScheduler.this") + void remove(HeldContainer hc) { + remove(hc, hc.getHost()); + remove(hc, hc.getRack()); + remove(hc, ResourceRequest.ANY); + --numContainers; + } + + @GuardedBy("DagAwareYarnTaskScheduler.this") + int getNumContainers() { + return numContainers; + } + + private void add(HeldContainer hc, String location) { + Set containers = containersByLocation.get(location); + if (containers == null) { + containers = new HashSet<>(); + containersByLocation.put(location, containers); + } + containers.add(hc); + } + + private void remove(HeldContainer hc, String location) { + Set containers = containersByLocation.get(location); + if (containers != null) { + if (containers.remove(hc) && containers.isEmpty()) { + containersByLocation.remove(location); + } + } + } + + @GuardedBy("DagAwareYarnTaskScheduler.this") + @Nullable + Set getByLocation(String location) { + return containersByLocation.get(location); + } + } + + private interface ResourceCalculator { + boolean anyAvailable(Resource rsrc); + void deductFrom(Resource total, Resource toSubtract); + } + + /** + * ResourceCalculator for memory-only allocation + */ + private static class MemResourceCalculator implements ResourceCalculator { + + @Override + public boolean anyAvailable(Resource rsrc) { + return rsrc.getMemory() > 0; + } + + @Override + public void deductFrom(Resource total, Resource toSubtract) { + total.setMemory(total.getMemory() - toSubtract.getMemory()); + } + } + + /** + * ResourceCalculator for memory and vcore allocation + */ + private static class MemCpuResourceCalculator extends MemResourceCalculator { + + @Override + public boolean anyAvailable(Resource rsrc) { + return super.anyAvailable(rsrc) || rsrc.getVirtualCores() > 0; + } + + @Override + public void deductFrom(Resource total, Resource toSubtract) { + super.deductFrom(total, toSubtract); + total.setVirtualCores(total.getVirtualCores() - toSubtract.getVirtualCores()); + } + } + + /** + * Scheduled thread pool executor that logs any errors that escape the worker thread. + * This can be replaced with HadoopThreadPoolExecutor once Tez requires Hadoop 2.8 or later. + */ + static class ReuseContainerExecutor extends ScheduledThreadPoolExecutor { + ReuseContainerExecutor() { + super(1, new ThreadFactoryBuilder().setNameFormat("ReuseContainerExecutor #%d").build()); + setRemoveOnCancelPolicy(true); + setExecuteExistingDelayedTasksAfterShutdownPolicy(false); + } + + @Override + protected void afterExecute(Runnable r, Throwable t) { + super.afterExecute(r, t); + + if (t == null && r instanceof Future) { + try { + ((Future) r).get(); + } catch (ExecutionException ee) { + LOG.warn("Execution exception when running task in {}", Thread.currentThread().getName()); + t = ee.getCause(); + } catch (InterruptedException ie) { + LOG.warn("Thread ({}) interrupted: ", Thread.currentThread(), ie); + Thread.currentThread().interrupt(); + } catch (Throwable throwable) { + t = throwable; + } + } + + if (t != null) { + LOG.warn("Caught exception in thread {}", Thread.currentThread().getName(), t); + } + } + } + + @Override + public int getHeldContainersCount() { + return heldContainers.size(); + } +} diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/LocalTaskSchedulerService.java b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/LocalTaskSchedulerService.java index 7dabb73aa0..20f37119d9 100644 --- a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/LocalTaskSchedulerService.java +++ b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/LocalTaskSchedulerService.java @@ -19,8 +19,11 @@ package org.apache.tez.dag.app.rm; import java.io.IOException; +import java.util.ArrayList; +import java.util.BitSet; +import java.util.Map; +import java.util.concurrent.LinkedBlockingQueue; import java.util.concurrent.atomic.AtomicInteger; -import java.util.concurrent.BlockingQueue; import java.util.concurrent.PriorityBlockingQueue; import java.util.HashMap; import java.util.Iterator; @@ -29,6 +32,7 @@ import com.google.common.primitives.Ints; import org.apache.tez.common.TezUtils; +import org.apache.tez.serviceplugins.api.DagInfo; import org.apache.tez.serviceplugins.api.TaskScheduler; import org.apache.tez.serviceplugins.api.TaskSchedulerContext; import org.slf4j.Logger; @@ -51,19 +55,19 @@ public class LocalTaskSchedulerService extends TaskScheduler { private static final Logger LOG = LoggerFactory.getLogger(LocalTaskSchedulerService.class); final ContainerSignatureMatcher containerSignatureMatcher; - final PriorityBlockingQueue taskRequestQueue; + final LinkedBlockingQueue taskRequestQueue; final Configuration conf; AsyncDelegateRequestHandler taskRequestHandler; Thread asyncDelegateRequestThread; - final HashMap taskAllocations; + final HashMap taskAllocations; final String appTrackingUrl; final long customContainerAppId; public LocalTaskSchedulerService(TaskSchedulerContext taskSchedulerContext) { super(taskSchedulerContext); - taskRequestQueue = new PriorityBlockingQueue(); - taskAllocations = new LinkedHashMap(); + taskRequestQueue = new LinkedBlockingQueue<>(); + taskAllocations = new LinkedHashMap<>(); this.appTrackingUrl = taskSchedulerContext.getAppTrackingUrl(); this.containerSignatureMatcher = taskSchedulerContext.getContainerSignatureMatcher(); this.customContainerAppId = taskSchedulerContext.getCustomClusterIdentifier(); @@ -98,6 +102,7 @@ public int getClusterNodeCount() { @Override public void dagComplete() { + taskRequestHandler.dagComplete(); } @Override @@ -129,7 +134,7 @@ public synchronized void allocateTask(Object task, Resource capability, // in local mode every task is already container level local taskRequestHandler.addAllocateTaskRequest(task, capability, priority, clientCookie); } - + @Override public boolean deallocateTask(Object task, boolean taskSucceeded, TaskAttemptEndReason endReason, String diagnostics) { return taskRequestHandler.addDeallocateTaskRequest(task); @@ -137,6 +142,7 @@ public boolean deallocateTask(Object task, boolean taskSucceeded, TaskAttemptEnd @Override public Object deallocateContainer(ContainerId containerId) { + taskRequestHandler.addDeallocateContainerRequest(containerId); return null; } @@ -144,6 +150,7 @@ public Object deallocateContainer(ContainerId containerId) { public void initialize() { taskRequestHandler = createRequestHandler(conf); asyncDelegateRequestThread = new Thread(taskRequestHandler); + asyncDelegateRequestThread.setName(LocalTaskSchedulerService.class.getSimpleName() + "RequestHandler"); asyncDelegateRequestThread.setDaemon(true); } @@ -211,20 +218,14 @@ public Container createContainer(Resource capability, Priority priority) { } } - static class TaskRequest implements Comparable { - // Higher prority than Priority.UNDEFINED - static final int HIGHEST_PRIORITY = -2; - Object task; - Priority priority; + static class SchedulerRequest { + } - public TaskRequest(Object task, Priority priority) { - this.task = task; - this.priority = priority; - } + static class TaskRequest extends SchedulerRequest { + final Object task; - @Override - public int compareTo(TaskRequest request) { - return request.priority.compareTo(this.priority); + public TaskRequest(Object task) { + this.task = task; } @Override @@ -238,9 +239,6 @@ public boolean equals(Object o) { TaskRequest that = (TaskRequest) o; - if (priority != null ? !priority.equals(that.priority) : that.priority != null) { - return false; - } if (task != null ? !task.equals(that.task) : that.task != null) { return false; } @@ -250,23 +248,29 @@ public boolean equals(Object o) { @Override public int hashCode() { - int result = 1; - result = 7841 * result + (task != null ? task.hashCode() : 0); - result = 7841 * result + (priority != null ? priority.hashCode() : 0); - return result; + return 7841 + (task != null ? task.hashCode() : 0); } } - static class AllocateTaskRequest extends TaskRequest { - Resource capability; - Object clientCookie; + static class AllocateTaskRequest extends TaskRequest implements Comparable { + final Priority priority; + final Resource capability; + final Object clientCookie; + final int vertexIndex; - public AllocateTaskRequest(Object task, Resource capability, Priority priority, - Object clientCookie) { - super(task, priority); + public AllocateTaskRequest(Object task, int vertexIndex, Resource capability, Priority priority, + Object clientCookie) { + super(task); + this.priority = priority; this.capability = capability; this.clientCookie = clientCookie; + this.vertexIndex = vertexIndex; + } + + @Override + public int compareTo(AllocateTaskRequest request) { + return request.priority.compareTo(this.priority); } @Override @@ -283,6 +287,10 @@ public boolean equals(Object o) { AllocateTaskRequest that = (AllocateTaskRequest) o; + if (priority != null ? !priority.equals(that.priority) : that.priority != null) { + return false; + } + if (capability != null ? !capability.equals(that.capability) : that.capability != null) { return false; } @@ -297,6 +305,7 @@ public boolean equals(Object o) { @Override public int hashCode() { int result = super.hashCode(); + result = 12329 * result + (priority != null ? priority.hashCode() : 0); result = 12329 * result + (capability != null ? capability.hashCode() : 0); result = 12329 * result + (clientCookie != null ? clientCookie.hashCode() : 0); return result; @@ -304,37 +313,81 @@ public int hashCode() { } static class DeallocateTaskRequest extends TaskRequest { - static final Priority DEALLOCATE_PRIORITY = Priority.newInstance(HIGHEST_PRIORITY); public DeallocateTaskRequest(Object task) { - super(task, DEALLOCATE_PRIORITY); + super(task); + } + } + + static class DeallocateContainerRequest extends SchedulerRequest { + final ContainerId containerId; + + public DeallocateContainerRequest(ContainerId containerId) { + this.containerId = containerId; + } + } + + static class AllocatedTask { + final AllocateTaskRequest request; + final Container container; + + AllocatedTask(AllocateTaskRequest request, Container container) { + this.request = request; + this.container = container; } } static class AsyncDelegateRequestHandler implements Runnable { - final BlockingQueue taskRequestQueue; + final LinkedBlockingQueue clientRequestQueue; + final PriorityBlockingQueue taskRequestQueue; final LocalContainerFactory localContainerFactory; - final HashMap taskAllocations; + final HashMap taskAllocations; final TaskSchedulerContext taskSchedulerContext; + private final Object descendantsLock = new Object(); + private ArrayList vertexDescendants = null; final int MAX_TASKS; - AsyncDelegateRequestHandler(BlockingQueue taskRequestQueue, + AsyncDelegateRequestHandler(LinkedBlockingQueue clientRequestQueue, LocalContainerFactory localContainerFactory, - HashMap taskAllocations, + HashMap taskAllocations, TaskSchedulerContext taskSchedulerContext, Configuration conf) { - this.taskRequestQueue = taskRequestQueue; + this.clientRequestQueue = clientRequestQueue; this.localContainerFactory = localContainerFactory; this.taskAllocations = taskAllocations; this.taskSchedulerContext = taskSchedulerContext; this.MAX_TASKS = conf.getInt(TezConfiguration.TEZ_AM_INLINE_TASK_EXECUTION_MAX_TASKS, TezConfiguration.TEZ_AM_INLINE_TASK_EXECUTION_MAX_TASKS_DEFAULT); + this.taskRequestQueue = new PriorityBlockingQueue<>(); + } + + void dagComplete() { + synchronized (descendantsLock) { + vertexDescendants = null; + } + } + private void ensureVertexDescendants() { + synchronized (descendantsLock) { + if (vertexDescendants == null) { + DagInfo info = taskSchedulerContext.getCurrentDagInfo(); + if (info == null) { + throw new IllegalStateException("Scheduling tasks but no current DAG info?"); + } + int numVertices = info.getTotalVertices(); + ArrayList descendants = new ArrayList<>(numVertices); + for (int i = 0; i < numVertices; ++i) { + descendants.add(info.getVertexDescendants(i)); + } + vertexDescendants = descendants; + } + } } public void addAllocateTaskRequest(Object task, Resource capability, Priority priority, Object clientCookie) { try { - taskRequestQueue.put(new AllocateTaskRequest(task, capability, priority, clientCookie)); + int vertexIndex = taskSchedulerContext.getVertexIndexForTask(task); + clientRequestQueue.put(new AllocateTaskRequest(task, vertexIndex, capability, priority, clientCookie)); } catch (InterruptedException e) { Thread.currentThread().interrupt(); } @@ -342,80 +395,127 @@ public void addAllocateTaskRequest(Object task, Resource capability, Priority pr public boolean addDeallocateTaskRequest(Object task) { try { - taskRequestQueue.put(new DeallocateTaskRequest(task)); + clientRequestQueue.put(new DeallocateTaskRequest(task)); } catch (InterruptedException e) { Thread.currentThread().interrupt(); } - synchronized(taskRequestQueue) { - taskRequestQueue.notify(); - } return true; } - boolean shouldWait() { - return taskAllocations.size() >= MAX_TASKS; + public void addDeallocateContainerRequest(ContainerId containerId) { + try { + clientRequestQueue.put(new DeallocateContainerRequest(containerId)); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + } + } + + boolean shouldProcess() { + return !taskRequestQueue.isEmpty() && taskAllocations.size() < MAX_TASKS; + } + + boolean shouldPreempt() { + return !taskRequestQueue.isEmpty() && taskAllocations.size() >= MAX_TASKS; } @Override public void run() { - while(!Thread.currentThread().isInterrupted()) { - synchronized(taskRequestQueue) { - try { - if (shouldWait()) { - taskRequestQueue.wait(); - } - } catch (InterruptedException e) { - Thread.currentThread().interrupt(); - } + while (!Thread.currentThread().isInterrupted()) { + dispatchRequest(); + while (shouldProcess()) { + allocateTask(); } - processRequest(); } } - void processRequest() { - try { - TaskRequest request = taskRequestQueue.take(); - if (request instanceof AllocateTaskRequest) { - allocateTask((AllocateTaskRequest)request); + void dispatchRequest() { + try { + SchedulerRequest request = clientRequestQueue.take(); + if (request instanceof AllocateTaskRequest) { + taskRequestQueue.put((AllocateTaskRequest)request); + if (shouldPreempt()) { + maybePreempt((AllocateTaskRequest) request); } - else if (request instanceof DeallocateTaskRequest) { - deallocateTask((DeallocateTaskRequest)request); + } + else if (request instanceof DeallocateTaskRequest) { + deallocateTask((DeallocateTaskRequest)request); + } + else if (request instanceof DeallocateContainerRequest) { + preemptTask((DeallocateContainerRequest)request); + } + else { + LOG.error("Unknown task request message: " + request); + } + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + } + } + + void maybePreempt(AllocateTaskRequest request) { + Priority priority = request.priority; + for (Map.Entry entry : taskAllocations.entrySet()) { + AllocatedTask allocatedTask = entry.getValue(); + Container container = allocatedTask.container; + if (priority.compareTo(allocatedTask.container.getPriority()) > 0) { + Object task = entry.getKey(); + ensureVertexDescendants(); + if (vertexDescendants.get(request.vertexIndex).get(allocatedTask.request.vertexIndex)) { + LOG.info("Preempting task/container for task/priority:" + task + "/" + container + + " for " + request.task + "/" + priority); + taskSchedulerContext.preemptContainer(allocatedTask.container.getId()); } - } catch (InterruptedException e) { - Thread.currentThread().interrupt(); - } catch (NullPointerException e) { - LOG.warn("Task request was badly constructed"); } + } } - void allocateTask(AllocateTaskRequest request) { - Container container = localContainerFactory.createContainer(request.capability, - request.priority); - taskAllocations.put(request.task, container); - taskSchedulerContext.taskAllocated(request.task, request.clientCookie, container); + void allocateTask() { + try { + AllocateTaskRequest request = taskRequestQueue.take(); + Container container = localContainerFactory.createContainer(request.capability, + request.priority); + taskAllocations.put(request.task, new AllocatedTask(request, container)); + taskSchedulerContext.taskAllocated(request.task, request.clientCookie, container); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + } } void deallocateTask(DeallocateTaskRequest request) { - Container container = taskAllocations.remove(request.task); - if (container != null) { - taskSchedulerContext.containerBeingReleased(container.getId()); + AllocatedTask allocatedTask = taskAllocations.remove(request.task); + if (allocatedTask != null) { + taskSchedulerContext.containerBeingReleased(allocatedTask.container.getId()); } else { - boolean deallocationBeforeAllocation = false; - Iterator iter = taskRequestQueue.iterator(); + Iterator iter = taskRequestQueue.iterator(); while (iter.hasNext()) { TaskRequest taskRequest = iter.next(); - if (taskRequest instanceof AllocateTaskRequest && taskRequest.task.equals(request.task)) { + if (taskRequest.task.equals(request.task)) { iter.remove(); - deallocationBeforeAllocation = true; - LOG.info("deallcation happen before allocation for task:" + request.task); + LOG.info("Deallocation request before allocation for task:" + request.task); break; } } - if (!deallocationBeforeAllocation) { - throw new TezUncheckedException("Unable to find and remove task " + request.task + " from task allocations"); + } + } + + void preemptTask(DeallocateContainerRequest request) { + LOG.info("Trying to preempt: " + request.containerId); + Iterator> entries = taskAllocations.entrySet().iterator(); + while (entries.hasNext()) { + Map.Entry entry = entries.next(); + Container container = entry.getValue().container; + if (container.getId().equals(request.containerId)) { + entries.remove(); + Object task = entry.getKey(); + LOG.info("Preempting task/container:" + task + "/" + container); + taskSchedulerContext.containerBeingReleased(container.getId()); } } } } + + @Override + public int getHeldContainersCount() { + return 0; + } } diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/TaskSchedulerContextImpl.java b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/TaskSchedulerContextImpl.java index 39000d6b0a..948f7df321 100644 --- a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/TaskSchedulerContextImpl.java +++ b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/TaskSchedulerContextImpl.java @@ -18,8 +18,8 @@ import java.nio.ByteBuffer; import java.util.List; import java.util.Map; +import java.util.Objects; -import com.google.common.base.Preconditions; import org.apache.hadoop.yarn.api.records.ApplicationAccessType; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.Container; @@ -28,6 +28,7 @@ import org.apache.hadoop.yarn.api.records.NodeReport; import org.apache.hadoop.yarn.api.records.Resource; import org.apache.tez.common.ContainerSignatureMatcher; +import org.apache.tez.common.counters.DAGCounter; import org.apache.tez.dag.api.TezUncheckedException; import org.apache.tez.dag.api.UserPayload; import org.apache.tez.dag.app.AppContext; @@ -69,6 +70,16 @@ public void taskAllocated(Object task, Object appCookie, Container container) { taskSchedulerManager.taskAllocated(schedulerId, task, appCookie, container); } + @Override + public void containerAllocated(Container container) { + appContext.getCurrentDAG().incrementDagCounter(DAGCounter.TOTAL_CONTAINER_ALLOCATION_COUNT, 1); + } + + @Override + public void containerReused(Container container) { + appContext.getCurrentDAG().incrementDagCounter(DAGCounter.TOTAL_CONTAINER_REUSE_COUNT, 1); + } + @Override public void containerCompleted(Object taskLastAllocated, ContainerStatus containerStatus) { taskSchedulerManager.containerCompleted(schedulerId, taskLastAllocated, containerStatus); @@ -76,6 +87,7 @@ public void containerCompleted(Object taskLastAllocated, ContainerStatus contain @Override public void containerBeingReleased(ContainerId containerId) { + appContext.getCurrentDAG().incrementDagCounter(DAGCounter.TOTAL_CONTAINER_RELEASE_COUNT, 1); taskSchedulerManager.containerBeingReleased(schedulerId, containerId); } @@ -182,10 +194,15 @@ public AMState getAMState() { } } + @Override + public int getVertexIndexForTask(Object task) { + return taskSchedulerManager.getVertexIndexForTask(task); + } + @Override public void reportError(ServicePluginError servicePluginError, String diagnostics, DagInfo dagInfo) { - Preconditions.checkNotNull(servicePluginError, "ServicePluginError must be specified"); + Objects.requireNonNull(servicePluginError, "ServicePluginError must be specified"); taskSchedulerManager.reportError(schedulerId, servicePluginError, diagnostics, dagInfo); } } diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/TaskSchedulerContextImplWrapper.java b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/TaskSchedulerContextImplWrapper.java index 49ab77dcb8..304ea8ad7f 100644 --- a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/TaskSchedulerContextImplWrapper.java +++ b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/TaskSchedulerContextImplWrapper.java @@ -53,7 +53,7 @@ class TaskSchedulerContextImplWrapper implements TaskSchedulerContext { private TaskSchedulerContext real; private ExecutorService executorService; - + /** * @param real the actual TaskSchedulerAppCallback * @param executorService the ExecutorService to be used to send these events. @@ -70,6 +70,16 @@ public void taskAllocated(Object task, Object appCookie, Container container) { container)); } + @Override + public void containerAllocated(Container container) { + executorService.submit(new ContainerAllocatedCallable(real, container)); + } + + @Override + public void containerReused(Container container) { + executorService.submit(new ContainerReusedCallable(real, container)); + } + @Override public void containerCompleted(Object taskLastAllocated, ContainerStatus containerStatus) { @@ -90,7 +100,7 @@ public void nodesUpdated(List updatedNodes) { @Override public void appShutdownRequested() { - executorService.submit(new AppShudownRequestedCallable(real)); + executorService.submit(new AppShutdownRequestedCallable(real)); } @Override @@ -116,7 +126,7 @@ public float getProgress() { throw new TezUncheckedException(e); } } - + @Override public void preemptContainer(ContainerId containerId) { executorService.submit(new PreemptContainerCallable(real, containerId)); @@ -187,6 +197,11 @@ public AMState getAMState() { return real.getAMState(); } + @Override + public int getVertexIndexForTask(Object task) { + return real.getVertexIndexForTask(task); + } + // End of getters which do not need to go through a thread. Underlying implementation // does not use locks. @@ -221,6 +236,38 @@ public Void call() throws Exception { } } + static class ContainerAllocatedCallable extends TaskSchedulerContextCallbackBase + implements Callable { + private final Container container; + + ContainerAllocatedCallable(TaskSchedulerContext app, Container container) { + super(app); + this.container = container; + } + + @Override + public Void call() throws Exception { + app.containerAllocated(container); + return null; + } + } + + static class ContainerReusedCallable extends TaskSchedulerContextCallbackBase + implements Callable { + private final Container container; + + ContainerReusedCallable(TaskSchedulerContext app, Container container) { + super(app); + this.container = container; + } + + @Override + public Void call() throws Exception { + app.containerReused(container); + return null; + } + } + static class ContainerCompletedCallable extends TaskSchedulerContextCallbackBase implements Callable { @@ -275,10 +322,10 @@ public Void call() throws Exception { } } - static class AppShudownRequestedCallable extends TaskSchedulerContextCallbackBase + static class AppShutdownRequestedCallable extends TaskSchedulerContextCallbackBase implements Callable { - public AppShudownRequestedCallable(TaskSchedulerContext app) { + public AppShutdownRequestedCallable(TaskSchedulerContext app) { super(app); } @@ -341,19 +388,19 @@ public Void call() throws Exception { static class PreemptContainerCallable extends TaskSchedulerContextCallbackBase implements Callable { private final ContainerId containerId; - + public PreemptContainerCallable(TaskSchedulerContext app, ContainerId id) { super(app); this.containerId = id; } - + @Override public Void call() throws Exception { app.preemptContainer(containerId); return null; } } - + static class GetProgressCallable extends TaskSchedulerContextCallbackBase implements Callable { diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/TaskSchedulerManager.java b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/TaskSchedulerManager.java index 640e8f62a0..e311c23e86 100644 --- a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/TaskSchedulerManager.java +++ b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/TaskSchedulerManager.java @@ -28,6 +28,7 @@ import java.util.concurrent.LinkedBlockingQueue; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; +import java.util.Objects; import com.google.common.annotations.VisibleForTesting; import com.google.common.util.concurrent.ThreadFactoryBuilder; @@ -92,7 +93,7 @@ import org.apache.tez.hadoop.shim.HadoopShim; import org.apache.tez.hadoop.shim.HadoopShimsLoader; -import com.google.common.base.Preconditions; +import org.apache.tez.common.Preconditions; public class TaskSchedulerManager extends AbstractService implements @@ -139,6 +140,8 @@ public class TaskSchedulerManager extends AbstractService implements BlockingQueue eventQueue = new LinkedBlockingQueue(); + private final String yarnSchedulerClassName; + // Not tracking container / task to schedulerId. Instead relying on everything flowing through // the system and being propagated back via events. @@ -164,6 +167,8 @@ public TaskSchedulerManager(TaskScheduler taskScheduler, AppContext appContext, this.historyUrl = null; this.isLocalMode = false; this.hadoopShim = new HadoopShimsLoader(appContext.getAMConf()).getHadoopShim(); + this.yarnSchedulerClassName = appContext.getAMConf().get(TezConfiguration.TEZ_AM_YARN_SCHEDULER_CLASS, + TezConfiguration.TEZ_AM_YARN_SCHEDULER_CLASS_DEFAULT); } /** @@ -196,6 +201,8 @@ public TaskSchedulerManager(AppContext appContext, this.historyUrl = getHistoryUrl(); this.isLocalMode = isLocalMode; this.hadoopShim = hadoopShim; + this.yarnSchedulerClassName = appContext.getAMConf().get(TezConfiguration.TEZ_AM_YARN_SCHEDULER_CLASS, + TezConfiguration.TEZ_AM_YARN_SCHEDULER_CLASS_DEFAULT); this.appCallbackExecutor = createAppCallbackExecutorService(); if (this.webUI != null) { this.webUI.setHistoryUrl(this.historyUrl); @@ -217,9 +224,24 @@ public void setSignalled(boolean isSignalled) { } public int getNumClusterNodes() { + return getNumClusterNodes(false); + } + + public int getNumClusterNodes(boolean tryUpdate){ + if (cachedNodeCount == -1 && tryUpdate){ + cachedNodeCount = countAllNodes(); + } return cachedNodeCount; } - + + private int countAllNodes() { + try { + return taskSchedulers[0].getClusterNodeCount(); + } catch (Exception e) { + return handleTaskSchedulerExceptionWhileGettingNodeCount(e); + } + } + public Resource getAvailableResources(int schedulerId) { try { return taskSchedulers[schedulerId].getAvailableResources(); @@ -258,9 +280,7 @@ private ExecutorService createAppCallbackExecutorService() { } public synchronized void handleEvent(AMSchedulerEvent sEvent) { - if (LOG.isDebugEnabled()) { - LOG.debug("Processing the event " + sEvent.toString()); - } + LOG.debug("Processing the event {}", sEvent); switch (sEvent.getType()) { case S_TA_LAUNCH_REQUEST: handleTaLaunchRequest((AMSchedulerEventTALaunchRequest) sEvent); @@ -384,7 +404,7 @@ private void handleTAUnsuccessfulEnd(AMSchedulerEventTAEnded event) { String msg = "Error in TaskScheduler for handling Task De-allocation" + ", eventType=" + event.getType() + ", scheduler=" + Utils.getTaskSchedulerIdentifierString(event.getSchedulerId(), appContext) - + ", taskAttemptId=" + attempt.getID(); + + ", taskAttemptId=" + attempt.getTaskAttemptID(); LOG.error(msg, e); sendEvent( new DAGAppMasterEventUserServiceFatalError( @@ -398,10 +418,10 @@ private void handleTAUnsuccessfulEnd(AMSchedulerEventTAEnded event) { ContainerId attemptContainerId = attempt.getAssignedContainerID(); if(!wasContainerAllocated) { - LOG.info("Task: " + attempt.getID() + + LOG.info("Task: " + attempt.getTaskAttemptID() + " has no container assignment in the scheduler"); if (attemptContainerId != null) { - LOG.error("No container allocated to task: " + attempt.getID() + LOG.error("No container allocated to task: " + attempt.getTaskAttemptID() + " according to scheduler. Task reported container id: " + attemptContainerId); } @@ -414,10 +434,14 @@ private void handleTAUnsuccessfulEnd(AMSchedulerEventTAEnded event) { // Inform the Node - the task has asked to be STOPPED / has already // stopped. // AMNodeImpl blacklisting logic does not account for KILLED attempts. - sendEvent(new AMNodeEventTaskAttemptEnded(appContext.getAllContainers(). - get(attemptContainerId).getContainer().getNodeId(), event.getSchedulerId(), - attemptContainerId, - attempt.getID(), event.getState() == TaskAttemptState.FAILED)); + AMContainer amContainer = appContext.getAllContainers().get(attemptContainerId); + // DAG can be shutting down so protect against container cleanup race + if (amContainer != null) { + Container container = amContainer.getContainer(); + sendEvent(new AMNodeEventTaskAttemptEnded(container.getNodeId(), event.getSchedulerId(), + attemptContainerId, + attempt.getTaskAttemptID(), event.getState() == TaskAttemptState.FAILED)); + } } } @@ -430,9 +454,14 @@ private void handleTASucceeded(AMSchedulerEventTAEnded event) { if (event.getUsedContainerId() != null) { sendEvent(new AMContainerEventTASucceeded(usedContainerId, event.getAttemptID())); - sendEvent(new AMNodeEventTaskAttemptSucceeded(appContext.getAllContainers(). - get(usedContainerId).getContainer().getNodeId(), event.getSchedulerId(), usedContainerId, - event.getAttemptID())); + AMContainer amContainer = appContext.getAllContainers().get(usedContainerId); + // DAG can be shutting down so protect against container cleanup race + if (amContainer != null) { + Container container = amContainer.getContainer(); + sendEvent(new AMNodeEventTaskAttemptSucceeded(container.getNodeId(), event.getSchedulerId(), + usedContainerId, + event.getAttemptID())); + } } boolean wasContainerAllocated = false; @@ -444,7 +473,7 @@ private void handleTASucceeded(AMSchedulerEventTAEnded event) { String msg = "Error in TaskScheduler for handling Task De-allocation" + ", eventType=" + event.getType() + ", scheduler=" + Utils.getTaskSchedulerIdentifierString(event.getSchedulerId(), appContext) - + ", taskAttemptId=" + attempt.getID(); + + ", taskAttemptId=" + attempt.getTaskAttemptID(); LOG.error(msg, e); sendEvent( new DAGAppMasterEventUserServiceFatalError( @@ -454,7 +483,7 @@ private void handleTASucceeded(AMSchedulerEventTAEnded event) { } if (!wasContainerAllocated) { - LOG.error("De-allocated successful task: " + attempt.getID() + LOG.error("De-allocated successful task: " + attempt.getTaskAttemptID() + ", but TaskScheduler reported no container assigned to task"); } } @@ -468,15 +497,16 @@ private void handleTaLaunchRequest(AMSchedulerEventTALaunchRequest event) { TaskBasedLocationAffinity taskAffinity = locationHint.getAffinitizedTask(); if (taskAffinity != null) { Vertex vertex = appContext.getCurrentDAG().getVertex(taskAffinity.getVertexName()); - Preconditions.checkNotNull(vertex, "Invalid vertex in task based affinity " + taskAffinity - + " for attempt: " + taskAttempt.getID()); + Objects.requireNonNull(vertex, "Invalid vertex in task based affinity " + taskAffinity + + " for attempt: " + taskAttempt.getTaskAttemptID()); int taskIndex = taskAffinity.getTaskIndex(); Preconditions.checkState(taskIndex >=0 && taskIndex < vertex.getTotalTasks(), "Invalid taskIndex in task based affinity " + taskAffinity - + " for attempt: " + taskAttempt.getID()); + + " for attempt: " + taskAttempt.getTaskAttemptID()); TaskAttempt affinityAttempt = vertex.getTask(taskIndex).getSuccessfulAttempt(); if (affinityAttempt != null) { - Preconditions.checkNotNull(affinityAttempt.getAssignedContainerID(), affinityAttempt.getID()); + Objects.requireNonNull(affinityAttempt.getAssignedContainerID(), + affinityAttempt.getTaskAttemptID() == null ? null : affinityAttempt.getTaskAttemptID().toString()); try { taskSchedulers[event.getSchedulerId()].allocateTask(taskAttempt, event.getCapability(), @@ -488,7 +518,7 @@ private void handleTaLaunchRequest(AMSchedulerEventTALaunchRequest event) { String msg = "Error in TaskScheduler for handling Task Allocation" + ", eventType=" + event.getType() + ", scheduler=" + Utils.getTaskSchedulerIdentifierString(event.getSchedulerId(), appContext) - + ", taskAttemptId=" + taskAttempt.getID(); + + ", taskAttemptId=" + taskAttempt.getTaskAttemptID(); LOG.error(msg, e); sendEvent( new DAGAppMasterEventUserServiceFatalError( @@ -498,7 +528,7 @@ private void handleTaLaunchRequest(AMSchedulerEventTALaunchRequest event) { return; } LOG.info("No attempt for task affinity to " + taskAffinity + " for attempt " - + taskAttempt.getID() + " Ignoring."); + + taskAttempt.getTaskAttemptID() + " Ignoring."); // fall through with null hosts/racks } else { hosts = (locationHint.getHosts() != null) ? locationHint @@ -521,7 +551,7 @@ private void handleTaLaunchRequest(AMSchedulerEventTALaunchRequest event) { String msg = "Error in TaskScheduler for handling Task Allocation" + ", eventType=" + event.getType() + ", scheduler=" + Utils.getTaskSchedulerIdentifierString(event.getSchedulerId(), appContext) - + ", taskAttemptId=" + taskAttempt.getID(); + + ", taskAttemptId=" + taskAttempt.getTaskAttemptID(); LOG.error(msg, e); sendEvent( new DAGAppMasterEventUserServiceFatalError( @@ -537,7 +567,7 @@ private void handleTAStateUpdated(AMSchedulerEventTAStateUpdated event) { String msg = "Error in TaskScheduler for handling Task State Update" + ", eventType=" + event.getType() + ", scheduler=" + Utils.getTaskSchedulerIdentifierString(event.getSchedulerId(), appContext) - + ", taskAttemptId=" + event.getTaskAttempt().getID() + + ", taskAttemptId=" + event.getTaskAttempt().getTaskAttemptID() + ", state=" + event.getState(); LOG.error(msg, e); sendEvent( @@ -574,9 +604,11 @@ TaskSchedulerContext wrapTaskSchedulerContext(TaskSchedulerContext rawContext) { @VisibleForTesting TaskScheduler createYarnTaskScheduler(TaskSchedulerContext taskSchedulerContext, - int schedulerId) { - LOG.info("Creating TaskScheduler: YarnTaskSchedulerService"); - return new YarnTaskSchedulerService(taskSchedulerContext); + int schedulerId) throws TezException { + LOG.info("Creating YARN TaskScheduler: {}", yarnSchedulerClassName); + return ReflectionUtils.createClazzInstance(yarnSchedulerClassName, + new Class[] { TaskSchedulerContext.class }, + new Object[] { taskSchedulerContext }); } @VisibleForTesting @@ -624,7 +656,9 @@ protected void instantiateSchedulers(String host, int port, String trackingUrl, @Override public synchronized void serviceStart() throws Exception { - InetSocketAddress serviceAddr = clientService.getBindAddress(); + // clientService is null in case of LocalDAGAppMaster + InetSocketAddress serviceAddr = clientService == null ? new InetSocketAddress("127.0.0.1", 0) + : clientService.getBindAddress(); dagAppMaster = appContext.getAppMaster(); // if web service is enabled then set tracking url. else disable it (value = ""). // the actual url set on the rm web ui will be the proxy url set by WebAppProxyServlet, which @@ -684,12 +718,14 @@ protected void notifyForTest() { public void initiateStop() { for (int i = 0 ; i < taskSchedulers.length ; i++) { - try { - taskSchedulers[i].getTaskScheduler().initiateStop(); - } catch (Exception e) { - // Ignore for now as scheduler stop invoked on shutdown - LOG.error("Failed to do a clean initiateStop for Scheduler: " - + Utils.getTaskSchedulerIdentifierString(i, appContext), e); + if (taskSchedulers[i] != null) { + try { + taskSchedulers[i].getTaskScheduler().initiateStop(); + } catch (Exception e) { + // Ignore for now as scheduler stop invoked on shutdown + LOG.error("Failed to do a clean initiateStop for Scheduler: " + + Utils.getTaskSchedulerIdentifierString(i, appContext), e); + } } } } @@ -725,19 +761,24 @@ public synchronized void taskAllocated(int schedulerId, Object task, sendEvent(new AMNodeEventContainerAllocated(container .getNodeId(), schedulerId, container.getId())); } - + appContext.getCurrentDAG().addUsedContainer(container); TaskAttempt taskAttempt = event.getTaskAttempt(); // TODO - perhaps check if the task still needs this container // because the deallocateTask downcall may have raced with the // taskAllocated() upcall assert task.equals(taskAttempt); - - if (appContext.getAllContainers().get(containerId).getState() == AMContainerState.ALLOCATED) { - sendEvent(new AMContainerEventLaunchRequest(containerId, taskAttempt.getVertexID(), - event.getContainerContext(), event.getLauncherId(), event.getTaskCommId())); + + AMContainer amContainer = appContext.getAllContainers().get(containerId); + // Even though we just added this container, + // DAG can be shutting down so protect against container cleanup race + if (amContainer != null) { + if (amContainer.getState() == AMContainerState.ALLOCATED) { + sendEvent(new AMContainerEventLaunchRequest(containerId, taskAttempt.getVertexID(), + event.getContainerContext(), event.getLauncherId(), event.getTaskCommId())); + } } - sendEvent(new AMContainerEventAssignTA(containerId, taskAttempt.getID(), + sendEvent(new AMContainerEventAssignTA(containerId, taskAttempt.getTaskAttemptID(), event.getRemoteTaskSpec(), event.getContainerContext().getLocalResources(), event .getContainerContext().getCredentials(), event.getPriority())); } @@ -835,9 +876,7 @@ public AppFinalStatus getFinalAppStatus() { } } } - if(LOG.isDebugEnabled()) { - LOG.debug("Setting job diagnostics to " + sb.toString()); - } + LOG.debug("Setting job diagnostics to {}", sb); // if history url is set use the same, if historyUrl is set to "" then rm ui disables the // history url @@ -859,19 +898,7 @@ public float getProgress(int schedulerId) { // Doubles as a mechanism to update node counts periodically. Hence schedulerId required. // TODO Handle this in TEZ-2124. Need a way to know which scheduler is calling in. - int nodeCount = 0; - try { - nodeCount = taskSchedulers[0].getClusterNodeCount(); - } catch (Exception e) { - String msg = "Error in TaskScheduler while getting node count" - + ", scheduler=" + Utils.getTaskSchedulerIdentifierString(schedulerId, appContext); - LOG.error(msg, e); - sendEvent( - new DAGAppMasterEventUserServiceFatalError( - DAGAppMasterEventType.TASK_SCHEDULER_SERVICE_FATAL_ERROR, - msg, e)); - throw new RuntimeException(e); - } + int nodeCount = countAllNodes(); if (nodeCount != cachedNodeCount) { cachedNodeCount = nodeCount; sendEvent(new AMNodeEventNodeCountUpdated(cachedNodeCount, schedulerId)); @@ -879,18 +906,28 @@ public float getProgress(int schedulerId) { return dagAppMaster.getProgress(); } + private int handleTaskSchedulerExceptionWhileGettingNodeCount(Exception e) { + String msg = "Error in TaskScheduler while getting node count" + + ", scheduler=" + Utils.getTaskSchedulerIdentifierString(0, appContext); + LOG.error(msg, e); + sendEvent( + new DAGAppMasterEventUserServiceFatalError( + DAGAppMasterEventType.TASK_SCHEDULER_SERVICE_FATAL_ERROR, + msg, e)); + throw new RuntimeException(e); + } + public void reportError(int taskSchedulerIndex, ServicePluginError servicePluginError, String diagnostics, DagInfo dagInfo) { if (servicePluginError == YarnTaskSchedulerServiceError.RESOURCEMANAGER_ERROR) { LOG.info("Error reported by scheduler {} - {}", - Utils.getTaskSchedulerIdentifierString(taskSchedulerIndex, appContext) + ": " + - diagnostics); - if (taskSchedulerDescriptors[taskSchedulerIndex].getClassName() - .equals(YarnTaskSchedulerService.class.getName())) { + Utils.getTaskSchedulerIdentifierString(taskSchedulerIndex, appContext), diagnostics); + if (taskSchedulerDescriptors[taskSchedulerIndex].getEntityName() + .equals(TezConstants.getTezYarnServicePluginName())) { LOG.warn( "Reporting a SchedulerServiceError to the DAGAppMaster since the error" + - " was reported by the default YARN Task Scheduler"); + " was reported by the YARN task scheduler"); sendEvent(new DAGAppMasterEventSchedulingServiceError(diagnostics)); } } else if (servicePluginError.getErrorType() == ServicePluginError.ErrorType.PERMANENT) { @@ -912,7 +949,7 @@ public void reportError(int taskSchedulerIndex, ServicePluginError servicePlugin } public void dagCompleted() { - for (int i = 0 ; i < taskSchedulers.length ; i++) { + for (int i = 0; i < taskSchedulers.length; i++) { try { taskSchedulers[i].dagComplete(); } catch (Exception e) { @@ -927,17 +964,33 @@ public void dagCompleted() { } } + public int getHeldContainersCount() { + int count = 0; + for (TaskSchedulerWrapper taskScheduler : taskSchedulers) { + count += taskScheduler.getTaskScheduler().getHeldContainersCount(); + } + return count; + } + public void dagSubmitted() { // Nothing to do right now. Indicates that a new DAG has been submitted and // the context has updated information. } + public int getVertexIndexForTask(Object task) { + TaskAttempt attempt = (TaskAttempt) task; + return attempt.getVertexID().getId(); + } + public void preemptContainer(int schedulerId, ContainerId containerId) { // TODO Why is this making a call back into the scheduler, when the call is originating from there. // An AMContainer instance should already exist if an attempt is being made to preempt it AMContainer amContainer = appContext.getAllContainers().get(containerId); try { - taskSchedulers[amContainer.getTaskSchedulerIdentifier()].deallocateContainer(containerId); + // DAG can be shutting down so protect against container cleanup race + if (amContainer != null) { + taskSchedulers[amContainer.getTaskSchedulerIdentifier()].deallocateContainer(containerId); + } } catch (Exception e) { String msg = "Error in TaskScheduler when preempting container" + ", scheduler=" + Utils.getTaskSchedulerIdentifierString(amContainer.getTaskSchedulerIdentifier(), appContext) @@ -978,13 +1031,21 @@ public ContainerSignatureMatcher getContainerSignatureMatcher() { } public boolean hasUnregistered() { + // Only return true if all task schedulers that were registered successfully unregister + if (taskSchedulers.length == 0) { + return false; + } boolean result = true; - for (int i = 0 ; i < taskSchedulers.length ; i++) { + for (int i = 0; i < taskSchedulers.length; i++) { // Explicitly not catching any exceptions around this API // No clear route to recover. Better to crash. + if (taskSchedulers[i] == null) { + return false; + } try { result = result & this.taskSchedulers[i].hasUnregistered(); } catch (Exception e) { + result = false; String msg = "Error in TaskScheduler when checking if a scheduler has unregistered" + ", scheduler=" + Utils.getTaskSchedulerIdentifierString(i, appContext); LOG.error(msg, e); @@ -1039,4 +1100,8 @@ public String getTaskSchedulerClassName(int taskSchedulerIndex) { return taskSchedulers[taskSchedulerIndex].getTaskScheduler().getClass().getName(); } + @VisibleForTesting + public TaskScheduler getTaskScheduler(int taskSchedulerIndex) { + return taskSchedulers[taskSchedulerIndex].getTaskScheduler(); + } } diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/YarnTaskSchedulerService.java b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/YarnTaskSchedulerService.java index 95cd85be18..b299324721 100644 --- a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/YarnTaskSchedulerService.java +++ b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/YarnTaskSchedulerService.java @@ -32,6 +32,7 @@ import java.util.Set; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.PriorityBlockingQueue; +import java.util.concurrent.ThreadLocalRandom; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; @@ -44,7 +45,6 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.commons.lang.exception.ExceptionUtils; -import org.apache.commons.math3.random.RandomDataGenerator; import org.apache.hadoop.classification.InterfaceAudience.Private; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.util.ExitUtil; @@ -63,12 +63,13 @@ import org.apache.hadoop.yarn.util.RackResolver; import org.apache.hadoop.yarn.util.resource.Resources; import org.apache.tez.serviceplugins.api.TaskAttemptEndReason; +import org.apache.tez.dag.app.dag.TaskAttempt; import org.apache.tez.dag.api.TezConfiguration; import org.apache.tez.dag.api.TezUncheckedException; import org.apache.tez.common.ContainerSignatureMatcher; import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; +import org.apache.tez.common.Preconditions; import com.google.common.collect.Lists; import com.google.common.collect.Maps; import com.google.common.collect.Sets; @@ -118,7 +119,7 @@ public class YarnTaskSchedulerService extends TaskScheduler new HashMap(); Set priorityHasAffinity = Sets.newHashSet(); - + Set blacklistedNodes = Collections .newSetFromMap(new ConcurrentHashMap()); @@ -152,7 +153,6 @@ public class YarnTaskSchedulerService extends TaskScheduler Set sessionMinHeldContainers = Sets.newHashSet(); - RandomDataGenerator random = new RandomDataGenerator(); private final Configuration conf; @VisibleForTesting @@ -254,7 +254,8 @@ public YarnTaskSchedulerService(TaskSchedulerContext taskSchedulerContext) { @Override public Resource getAvailableResources() { - return amRmClient.getAvailableResources(); + Resource resource = amRmClient.getAvailableResources(); + return resource == null ? Resource.newInstance(0, 0) : resource; } @Override @@ -448,10 +449,8 @@ public void onContainersCompleted(List statuses) { // being released // completion of a container we had released earlier // an allocated container completed. notify app - if (LOG.isDebugEnabled()) { - LOG.debug("Released container completed:" + completedId + - " last allocated to task: " + task); - } + LOG.debug("Released container completed:{} last allocated to task: {}", + completedId, task); appContainerStatus.put(task, containerStatus); continue; } @@ -492,6 +491,8 @@ public void onContainersCompleted(List statuses) { @Override public void onContainersAllocated(List containers) { + super.onContainersAllocated(containers); + if (isStopStarted.get()) { LOG.info("Ignoring container allocations because application is shutting down. Num " + containers.size()); @@ -594,7 +595,7 @@ long getHeldContainerExpireTime(long startTime) { long expireTime = (startTime + idleContainerTimeoutMin); if (idleContainerTimeoutMin != -1 && idleContainerTimeoutMin < idleContainerTimeoutMax) { long expireTimeMax = startTime + idleContainerTimeoutMax; - expireTime = random.nextLong(expireTime, expireTimeMax); + expireTime = ThreadLocalRandom.current().nextLong(expireTime, expireTimeMax); } return expireTime; @@ -641,7 +642,7 @@ long getHeldContainerExpireTime(long startTime) { long currentTime = System.currentTimeMillis(); boolean releaseContainer = false; - if (isNew || (heldContainer.getContainerExpiryTime() <= currentTime + if (isNew || (heldContainer.getContainerExpiryTime() - currentTime <= 0 && idleContainerTimeoutMin != -1)) { // container idle timeout has expired or is a new unused container. // new container is possibly a spurious race condition allocation. @@ -774,7 +775,7 @@ long getHeldContainerExpireTime(long startTime) { // if we are not being able to assign containers to pending tasks then // we cannot avoid releasing containers. Or else we may not be able to // get new containers from YARN to match the pending request - if (!isNew && heldContainer.getContainerExpiryTime() <= currentTime + if (!isNew && heldContainer.getContainerExpiryTime() - currentTime <= 0 && idleContainerTimeoutMin != -1) { LOG.info("Container's idle timeout expired. Releasing container" + ", containerId=" + heldContainer.container.getId() @@ -1168,7 +1169,7 @@ boolean preemptIfNeeded() { ContainerId[] preemptedContainers = null; int numPendingRequestsToService = 0; synchronized (this) { - Resource freeResources = amRmClient.getAvailableResources(); + Resource freeResources = this.getAvailableResources(); if (LOG.isDebugEnabled()) { LOG.debug(constructPreemptionPeriodicLog(freeResources)); } else { @@ -1215,12 +1216,9 @@ boolean preemptIfNeeded() { if(!preemptionWaitDeadlineCrossed && fitsIn(highestPriRequest.getCapability(), freeResources)) { - if (LOG.isDebugEnabled()) { - LOG.debug(highestPriRequest + " fits in free resources"); - } else { - if (numHeartbeats % 50 == 1) { - LOG.info(highestPriRequest + " fits in free resources"); - } + LOG.debug("{} fits in free resources", highestPriRequest); + if (numHeartbeats % 50 == 1) { + LOG.info(highestPriRequest + " fits in free resources"); } return true; } @@ -1277,7 +1275,7 @@ boolean preemptIfNeeded() { + numHighestPriRequests + " pending requests at pri: " + highestPriRequest.getPriority()); } - + int newContainersReleased = 0; for (int i=0; i= nextScheduleTs) { + if (currentTs - nextScheduleTs >= 0) { Map assignedContainers = null; synchronized(YarnTaskSchedulerService.this) { // Remove the container and try scheduling it. @@ -2084,10 +2088,7 @@ private void doAssignAll() { // honor reuse-locality flags (container not timed out yet), Don't queue // (already in queue), don't release (release happens when containers // time-out) - if (LOG.isDebugEnabled()) { - LOG.debug("Trying to assign all delayed containers to newly received" - + " tasks"); - } + LOG.debug("Trying to assign all delayed containers to newly received tasks"); Iterator iter = delayedContainers.iterator(); while(iter.hasNext()) { HeldContainer delayedContainer = iter.next(); @@ -2165,6 +2166,13 @@ void addDelayedContainer(Container container, } } + void removeDelayedContainer(HeldContainer container) { + synchronized(this) { + if (delayedContainers.remove(container)) { + LOG.debug("Removed {} from delayed containers", container.getContainer().getId()); + } + } + } } synchronized void determineMinHeldContainers() { @@ -2406,4 +2414,9 @@ public String toString() { : "null"); } } + + @Override + public int getHeldContainersCount() { + return heldContainers.size(); + } } diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/container/AMContainerEventLaunchRequest.java b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/container/AMContainerEventLaunchRequest.java index 92e5817243..c3aae35910 100644 --- a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/container/AMContainerEventLaunchRequest.java +++ b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/container/AMContainerEventLaunchRequest.java @@ -41,7 +41,7 @@ public AMContainerEventLaunchRequest(ContainerId containerId, } public TezDAGID getDAGId() { - return this.vertexId.getDAGId(); + return this.vertexId.getDAGID(); } public TezVertexID getVertexId() { diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/container/AMContainerHelpers.java b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/container/AMContainerHelpers.java index ee322655af..a0407c0a43 100644 --- a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/container/AMContainerHelpers.java +++ b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/container/AMContainerHelpers.java @@ -25,7 +25,6 @@ import java.util.List; import java.util.Map; import java.util.Map.Entry; -import java.util.TreeMap; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -48,7 +47,6 @@ import org.apache.tez.common.TezCommonUtils; import org.apache.tez.common.security.TokenCache; import org.apache.tez.dag.api.TezConfiguration; -import org.apache.tez.dag.api.TezConstants; import org.apache.tez.dag.api.TezUncheckedException; import org.apache.tez.dag.app.AppContext; import org.apache.tez.dag.records.TezDAGID; @@ -56,18 +54,20 @@ import com.google.common.annotations.VisibleForTesting; -public class AMContainerHelpers { +public final class AMContainerHelpers { private static final Logger LOG = LoggerFactory.getLogger(AMContainerHelpers.class); - private static Object commonContainerSpecLock = new Object(); + private static final Object COMMON_CONTAINER_SPEC_LOCK = new Object(); private static TezDAGID lastDAGID = null; - private static Map commonContainerSpecs = - new HashMap(); + private static final Map COMMON_CONTAINER_SPECS = + new HashMap<>(); + + private AMContainerHelpers() {} public static void dagComplete(TezDAGID dagId) { - synchronized (commonContainerSpecLock) { - commonContainerSpecs.remove(dagId); + synchronized (COMMON_CONTAINER_SPEC_LOCK) { + COMMON_CONTAINER_SPECS.remove(dagId); } } @@ -89,24 +89,21 @@ public static LocalResource createLocalResource(FileSystem fc, Path file, /** * Create the common {@link ContainerLaunchContext} for all attempts. - * - * @param applicationACLs - * @param auxiliaryService */ private static ContainerLaunchContext createCommonContainerLaunchContext( Map applicationACLs, Credentials credentials, String auxiliaryService) { // Application environment - Map environment = new HashMap(); + Map environment = new HashMap<>(); // Service data - Map serviceData = new HashMap(); + Map serviceData = new HashMap<>(); // Tokens // Setup up task credentials buffer - ByteBuffer containerCredentialsBuffer = ByteBuffer.wrap(new byte[] {}); + ByteBuffer containerCredentialsBuffer; try { Credentials containerCredentials = new Credentials(); @@ -126,9 +123,7 @@ private static ContainerLaunchContext createCommonContainerLaunchContext( containerTokens_dob.getLength()); // Add shuffle token - if (LOG.isDebugEnabled()) { - LOG.debug("Putting shuffle token in serviceData in common CLC"); - } + LOG.debug("Putting shuffle token in serviceData in common CLC"); serviceData.put(auxiliaryService, TezCommonUtils.serializeServiceData(TokenCache.getSessionToken(containerCredentials))); } catch (IOException e) { @@ -137,10 +132,8 @@ private static ContainerLaunchContext createCommonContainerLaunchContext( // Construct the actual Container // The null fields are per-container and will be constructed for each // container separately. - ContainerLaunchContext container = - ContainerLaunchContext.newInstance(null, environment, null, - serviceData, containerCredentialsBuffer, applicationACLs); - return container; + return ContainerLaunchContext.newInstance(null, environment, null, + serviceData, containerCredentialsBuffer, applicationACLs); } @VisibleForTesting @@ -155,14 +148,14 @@ public static ContainerLaunchContext createContainerLaunchContext( AppContext appContext, Resource containerResource, Configuration conf, String auxiliaryService) { - ContainerLaunchContext commonContainerSpec = null; - synchronized (commonContainerSpecLock) { - if (!commonContainerSpecs.containsKey(tezDAGID)) { + ContainerLaunchContext commonContainerSpec; + synchronized (COMMON_CONTAINER_SPEC_LOCK) { + if (!COMMON_CONTAINER_SPECS.containsKey(tezDAGID)) { commonContainerSpec = createCommonContainerLaunchContext(acls, credentials, auxiliaryService); - commonContainerSpecs.put(tezDAGID, commonContainerSpec); + COMMON_CONTAINER_SPECS.put(tezDAGID, commonContainerSpec); } else { - commonContainerSpec = commonContainerSpecs.get(tezDAGID); + commonContainerSpec = COMMON_CONTAINER_SPECS.get(tezDAGID); } // Ensure that we remove container specs for previous AMs to reduce @@ -170,14 +163,14 @@ public static ContainerLaunchContext createContainerLaunchContext( if (lastDAGID == null) { lastDAGID = tezDAGID; } else if (!lastDAGID.equals(tezDAGID)) { - commonContainerSpecs.remove(lastDAGID); + COMMON_CONTAINER_SPECS.remove(lastDAGID); lastDAGID = tezDAGID; } } // Setup environment by cloning from common env. Map env = commonContainerSpec.getEnvironment(); - Map myEnv = new HashMap(env.size()); + Map myEnv = new HashMap<>(env.size()); myEnv.putAll(env); myEnv.putAll(vertexEnv); @@ -199,17 +192,15 @@ public static ContainerLaunchContext createContainerLaunchContext( appContext.getApplicationAttemptId().getAttemptId(), modifiedJavaOpts); // Duplicate the ByteBuffers for access by multiple containers. - Map myServiceData = new HashMap(); + Map myServiceData = new HashMap<>(); for (Entry entry : commonContainerSpec.getServiceData() .entrySet()) { myServiceData.put(entry.getKey(), entry.getValue().duplicate()); } // Construct the actual Container - ContainerLaunchContext container = - ContainerLaunchContext.newInstance(localResources, myEnv, commands, - myServiceData, commonContainerSpec.getTokens().duplicate(), acls); - return container; + return ContainerLaunchContext.newInstance(localResources, myEnv, commands, + myServiceData, commonContainerSpec.getTokens().duplicate(), acls); } } diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/container/AMContainerImpl.java b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/container/AMContainerImpl.java index 02243b8aca..6b67eb9da1 100644 --- a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/container/AMContainerImpl.java +++ b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/container/AMContainerImpl.java @@ -73,7 +73,7 @@ import org.apache.tez.dag.records.TezTaskAttemptID; import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; +import org.apache.tez.common.Preconditions; @SuppressWarnings("rawtypes") public class AMContainerImpl implements AMContainer { diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/container/AMContainerTask.java b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/container/AMContainerTask.java index 7b22ba6416..cfc143a402 100644 --- a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/container/AMContainerTask.java +++ b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/container/AMContainerTask.java @@ -19,8 +19,8 @@ package org.apache.tez.dag.app.rm.container; import java.util.Map; +import java.util.Objects; -import com.google.common.base.Preconditions; import org.apache.hadoop.security.Credentials; import org.apache.hadoop.yarn.api.records.LocalResource; import org.apache.tez.runtime.api.impl.TaskSpec; @@ -35,7 +35,7 @@ public class AMContainerTask { public AMContainerTask(TaskSpec tezTask, Map additionalResources, Credentials credentials, boolean credentialsChanged, int priority) { - Preconditions.checkNotNull(tezTask, "TaskSpec cannot be null"); + Objects.requireNonNull(tezTask, "TaskSpec cannot be null"); this.tezTask = tezTask; this.additionalResources = additionalResources; this.credentials = credentials; diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/container/ContainerContextMatcher.java b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/container/ContainerContextMatcher.java index 436f098427..f9c57c8843 100644 --- a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/container/ContainerContextMatcher.java +++ b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/container/ContainerContextMatcher.java @@ -21,18 +21,19 @@ import java.util.HashMap; import java.util.Map; import java.util.Map.Entry; +import java.util.Objects; import org.apache.hadoop.yarn.api.records.LocalResource; import org.apache.tez.dag.app.ContainerContext; -import com.google.common.base.Preconditions; +import org.apache.tez.common.Preconditions; import org.apache.tez.common.ContainerSignatureMatcher; public class ContainerContextMatcher implements ContainerSignatureMatcher { private void checkArguments(Object cs1, Object cs2) { - Preconditions.checkNotNull(cs1, "Arguments cannot be null"); - Preconditions.checkNotNull(cs2, "Arguments cannot be null"); + Objects.requireNonNull(cs1, "Arguments cannot be null"); + Objects.requireNonNull(cs2, "Arguments cannot be null"); Preconditions.checkArgument(cs1 instanceof ContainerContext && cs2 instanceof ContainerContext, "Container context can only compare instances of " @@ -62,8 +63,8 @@ public boolean isExactMatch(Object cs1, Object cs2) { @Override public Map getAdditionalResources(Map lr1, Map lr2) { - Preconditions.checkNotNull(lr1); - Preconditions.checkNotNull(lr2); + Objects.requireNonNull(lr1); + Objects.requireNonNull(lr2); Map c2LocalResources = new HashMap(lr2); for (Entry c1LocalResource : lr1.entrySet()) { diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/node/AMNodeEvent.java b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/node/AMNodeEvent.java index 1a975b042c..d9e249af4e 100644 --- a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/node/AMNodeEvent.java +++ b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/node/AMNodeEvent.java @@ -25,15 +25,24 @@ public class AMNodeEvent extends AbstractEvent { private final NodeId nodeId; private final int schedulerId; + private final ExtendedNodeId amNodeId; public AMNodeEvent(NodeId nodeId, int schedulerId, AMNodeEventType type) { super(type); this.nodeId = nodeId; this.schedulerId = schedulerId; + this.amNodeId = null; + } + + public AMNodeEvent(ExtendedNodeId amNodeId, int schedulerId, AMNodeEventType type) { + super(type); + this.nodeId = null; + this.schedulerId = schedulerId; + this.amNodeId = amNodeId; } public NodeId getNodeId() { - return this.nodeId; + return amNodeId == null ? this.nodeId : this.amNodeId; } public int getSchedulerId() { diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/node/AMNodeEventType.java b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/node/AMNodeEventType.java index a14112486a..3dd7a6a9f8 100644 --- a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/node/AMNodeEventType.java +++ b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/node/AMNodeEventType.java @@ -28,14 +28,14 @@ public enum AMNodeEventType { //Producer: TaskSchedulerEventHandler N_TA_SUCCEEDED, - // Producer: TaskSchedulerEventHnadler, Task(retroactive failure) + // Producer: TaskSchedulerEventHandler, Task(retroactive failure) N_TA_ENDED, - + //Producer: TaskScheduler via TaskSchedulerEventHandler N_TURNED_UNHEALTHY, N_TURNED_HEALTHY, N_NODE_COUNT_UPDATED, // for blacklisting. - + //Producer: AMNodeManager N_IGNORE_BLACKLISTING_ENABLED, N_IGNORE_BLACKLISTING_DISABLED, diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/node/AMNodeImpl.java b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/node/AMNodeImpl.java index f4ad032514..26796d2af4 100644 --- a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/node/AMNodeImpl.java +++ b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/node/AMNodeImpl.java @@ -304,7 +304,7 @@ protected static class TaskAttemptFailedTransition implements @Override public AMNodeState transition(AMNodeImpl node, AMNodeEvent nEvent) { AMNodeEventTaskAttemptEnded event = (AMNodeEventTaskAttemptEnded) nEvent; - LOG.info("Attempt " + (event.failed() ? "failed" : "killed") + "on node: " + node.getNodeId() + LOG.info("Attempt " + (event.failed() ? "failed" : "killed") + " on node: " + node.getNodeId() + " TA: " + event.getTaskAttemptId() + ", container: " + event.getContainerId() + ", numFailedTAs: " + node.numFailedTAs); @@ -509,4 +509,10 @@ public void dagComplete(DAG dag) { this.writeLock.unlock(); } } + + public String toString() { + return String.format( + "{AMNodeImpl: nodeId: %s, state: %s, containers: %d, completed containers: %d, healthy: %s, blackListed: %s}", + nodeId, getState(), getContainers().size(), completedContainers.size(), !isUnhealthy(), isBlacklisted()); + } } diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/node/AMNodeTracker.java b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/node/AMNodeTracker.java index 1536170fac..8c81cb52c9 100644 --- a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/node/AMNodeTracker.java +++ b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/node/AMNodeTracker.java @@ -134,6 +134,17 @@ public int getNumNodes(int schedulerId) { return perSourceNodeTrackers.get(schedulerId).getNumNodes(); } + /** + * Retrieve the number of nodes in ACTIVE state. This number is suitable for deciding + * how many nodes can be potentially used for running containers at the moment. + * + * @param schedulerId the schedulerId for which the node count is required + * @return the number of nodes from the scheduler being in ACTIVE state + */ + public int getNumActiveNodes(int schedulerId) { + return perSourceNodeTrackers.get(schedulerId).getNumActiveNodes(); + } + @Private @VisibleForTesting public boolean isBlacklistingIgnored(int schedulerId) { @@ -158,6 +169,4 @@ private PerSourceNodeTracker getAndCreateIfNeededPerSourceTracker(int schedulerI } return nodeTracker; } - - } diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/node/ExtendedNodeId.java b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/node/ExtendedNodeId.java new file mode 100644 index 0000000000..07b2dd4252 --- /dev/null +++ b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/node/ExtendedNodeId.java @@ -0,0 +1,95 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tez.dag.app.rm.node; + +import java.util.Objects; + +import org.apache.hadoop.yarn.api.records.NodeId; + +/** + * ExtendedNodeId extends NodeId with unique identifier in addition to hostname and port. + */ +public class ExtendedNodeId extends NodeId { + private NodeId nodeId; + private String host; + private int port; + private final String uniqueIdentifier; + + public ExtendedNodeId(NodeId nodeId, String uniqueIdentifier) { + this.nodeId = Objects.requireNonNull(nodeId); + this.uniqueIdentifier = uniqueIdentifier == null ? "" : uniqueIdentifier.trim(); + } + + @Override + public String getHost() { + return nodeId.getHost(); + } + + @Override + protected void setHost(final String host) { + this.host = host; + build(); + } + + @Override + public int getPort() { + return nodeId.getPort(); + } + + @Override + protected void setPort(final int port) { + this.port = port; + build(); + } + + @Override + protected void build() { + this.nodeId = NodeId.newInstance(host, port); + } + + @Override + public String toString() { + if (!uniqueIdentifier.isEmpty()) { + return super.toString() + ":" + uniqueIdentifier; + } + return super.toString(); + } + + @Override + public int hashCode() { + if (!uniqueIdentifier.isEmpty()) { + return super.hashCode() + 31 * uniqueIdentifier.hashCode(); + } + return super.hashCode(); + } + + @Override + public boolean equals(final Object obj) { + if (this == obj) { + return true; + } else if (obj == null) { + return false; + } else if (this.getClass() != obj.getClass()) { + return false; + } else { + ExtendedNodeId amNodeId = (ExtendedNodeId) obj; + return super.equals(obj) && Objects.equals(uniqueIdentifier, amNodeId.uniqueIdentifier); + } + } +} diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/node/PerSourceNodeTracker.java b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/node/PerSourceNodeTracker.java index 74c6176e4c..9906644fe1 100644 --- a/tez-dag/src/main/java/org/apache/tez/dag/app/rm/node/PerSourceNodeTracker.java +++ b/tez-dag/src/main/java/org/apache/tez/dag/app/rm/node/PerSourceNodeTracker.java @@ -84,6 +84,10 @@ public int getNumNodes() { return nodeMap.size(); } + public int getNumActiveNodes() { + return (int) nodeMap.values().stream().filter(node -> node.getState() == AMNodeState.ACTIVE).count(); + } + public void handle(AMNodeEvent rEvent) { // No synchronization required until there's multiple dispatchers. NodeId nodeId = rEvent.getNodeId(); diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/web/AMWebController.java b/tez-dag/src/main/java/org/apache/tez/dag/app/web/AMWebController.java index 2115dac247..00cd26e2ce 100644 --- a/tez-dag/src/main/java/org/apache/tez/dag/app/web/AMWebController.java +++ b/tez-dag/src/main/java/org/apache/tez/dag/app/web/AMWebController.java @@ -144,9 +144,7 @@ public void setCorsHeaders() { URL url = new URL(historyUrlBase); origin = url.getProtocol() + "://" + url.getAuthority(); } catch (MalformedURLException e) { - if (LOG.isDebugEnabled()) { - LOG.debug("Invalid url set for tez history url base: " + historyUrlBase, e); - } + LOG.debug("Invalid url set for tez history url base: {}", historyUrlBase, e); } } @@ -161,9 +159,7 @@ public void setCorsHeaders() { } void sendErrorResponse(int sc, String msg, Exception e) { - if (LOG.isDebugEnabled()) { - LOG.debug(msg, e); - } + LOG.debug(msg, e); try { response().sendError(sc, msg); @@ -764,7 +760,7 @@ public void getTasksInfo() { ArrayList> tasksInfo = new ArrayList>(); for(Task t : tasks) { Map taskInfo = new HashMap(); - taskInfo.put("id", t.getTaskId().toString()); + taskInfo.put("id", t.getTaskID().toString()); taskInfo.put("progress", Float.toString(t.getProgress())); taskInfo.put("status", t.getState().toString()); @@ -814,7 +810,7 @@ else if(!attemptIDs.isEmpty()) { } TaskAttempt attempt = task. - getAttempt(TezTaskAttemptID.getInstance(task.getTaskId(), indexes.get(2))); + getAttempt(TezTaskAttemptID.getInstance(task.getTaskID(), indexes.get(2))); if(attempt == null) { continue; } @@ -862,7 +858,7 @@ public void getAttemptsInfo() { ArrayList> attemptsInfo = new ArrayList>(); for(TaskAttempt a : attempts) { Map attemptInfo = new HashMap(); - attemptInfo.put("id", a.getID().toString()); + attemptInfo.put("id", a.getTaskAttemptID().toString()); attemptInfo.put("progress", Float.toString(a.getProgress())); attemptInfo.put("status", a.getState().toString()); diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/web/WebUIService.java b/tez-dag/src/main/java/org/apache/tez/dag/app/web/WebUIService.java index 1ac178b9f4..da7e66d037 100644 --- a/tez-dag/src/main/java/org/apache/tez/dag/app/web/WebUIService.java +++ b/tez-dag/src/main/java/org/apache/tez/dag/app/web/WebUIService.java @@ -20,9 +20,20 @@ import static org.apache.hadoop.yarn.util.StringHelper.pajoin; +import java.io.IOException; import java.net.InetSocketAddress; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; + +import org.apache.tez.common.Preconditions; +import org.apache.tez.common.web.ProfileServlet; +import org.apache.tez.common.web.ServletToControllerAdapters.ConfServletController; +import org.apache.tez.common.web.ServletToControllerAdapters.JMXJsonServletController; +import org.apache.tez.common.web.ServletToControllerAdapters.StackServletController; +import org.apache.tez.common.web.ServletToControllerAdapters.ProfileServletController; +import org.apache.tez.common.web.ServletToControllerAdapters.ProfileOutputServletController; -import com.google.common.base.Preconditions; import com.google.inject.name.Names; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -32,6 +43,7 @@ import org.apache.hadoop.yarn.webapp.WebApp; import org.apache.hadoop.yarn.webapp.WebApps; import org.apache.hadoop.yarn.webapp.YarnWebParams; +import org.apache.tez.dag.api.TezConfiguration; import org.apache.tez.dag.api.TezUncheckedException; import org.apache.tez.dag.app.AppContext; @@ -51,6 +63,7 @@ public class WebUIService extends AbstractService { private final AppContext context; private TezAMWebApp tezAMWebApp; private WebApp webApp; + private String baseUrl = ""; //url without paths, like http://host:port private String trackingUrl = ""; private String historyUrl = ""; @@ -88,9 +101,16 @@ protected void serviceStart() throws Exception { // certificates, however AM user is not trusted. // ideally the withHttpPolicy should be used, however hadoop 2.2 does not have the api conf.set("yarn.http.policy", "HTTP_ONLY"); + if (conf.get(TezConfiguration.TEZ_AM_WEBSERVICE_PORT_RANGE) == null) { + conf.set(TezConfiguration.TEZ_AM_WEBSERVICE_PORT_RANGE, + TezConfiguration.TEZ_AM_WEBSERVICE_PORT_RANGE_DEFAULT); + LOG.info( + "Using default port range for WebUIService: " + conf.get(TezConfiguration.TEZ_AM_WEBSERVICE_PORT_RANGE)); + } this.webApp = WebApps .$for(this.tezAMWebApp) .with(conf) + .withPortRange(conf, TezConfiguration.TEZ_AM_WEBSERVICE_PORT_RANGE) .start(this.tezAMWebApp); InetSocketAddress address = webApp.getListenerAddress(); if (address != null) { @@ -105,7 +125,8 @@ protected void serviceStart() throws Exception { LOG.warn("Failed to resolve canonical hostname for " + context.getAppMaster().getAppNMHost()); } - trackingUrl = "http://" + hostname + ":" + port + "/ui/"; + baseUrl = "http://" + hostname + ":" + port; + trackingUrl = baseUrl + "/ui/"; LOG.info("Instantiated WebUIService at " + trackingUrl); } } catch (Exception e) { @@ -119,14 +140,16 @@ protected void serviceStart() throws Exception { @Override protected void serviceStop() throws Exception { if (this.webApp != null) { - if (LOG.isDebugEnabled()) { - LOG.debug("Stopping WebApp"); - } + LOG.debug("Stopping WebApp"); this.webApp.stop(); } super.serviceStop(); } + public String getBaseUrl() { + return baseUrl; + } + public String getTrackingURL() { return trackingUrl; } @@ -216,6 +239,22 @@ public void setup() { "getTasksInfo"); route(WS_PREFIX_V2 + pajoin("attemptsInfo", ATTEMPT_ID, DAG_ID), AMWebController.class, "getAttemptsInfo"); + route("/jmx", JMXJsonServletController.class); + route("/conf", ConfServletController.class); + route("/stacks", StackServletController.class); + final String asyncProfilerHome = ProfileServlet.getAsyncProfilerHome(); + if (asyncProfilerHome != null && !asyncProfilerHome.trim().isEmpty()) { + Path tmpDir = Paths.get(ProfileServlet.OUTPUT_DIR); + try { + Files.createDirectories(tmpDir); + route("/prof", ProfileServletController.class); + route("/prof-output", ProfileOutputServletController.class); + } catch (IOException e) { + LOG.info("Could not create directory for profiler output: {} Disabling /prof endpoint... ", tmpDir); + } + } else { + LOG.info("ASYNC_PROFILER_HOME env or -Dasync.profiler.home not specified. Disabling /prof endpoint.."); + } } } } diff --git a/tez-dag/src/main/java/org/apache/tez/dag/history/DAGHistoryEvent.java b/tez-dag/src/main/java/org/apache/tez/dag/history/DAGHistoryEvent.java index dfa6bbdf91..240d8bd96d 100644 --- a/tez-dag/src/main/java/org/apache/tez/dag/history/DAGHistoryEvent.java +++ b/tez-dag/src/main/java/org/apache/tez/dag/history/DAGHistoryEvent.java @@ -18,9 +18,10 @@ package org.apache.tez.dag.history; +import org.apache.tez.dag.records.DAGIDAware; import org.apache.tez.dag.records.TezDAGID; -public class DAGHistoryEvent { +public class DAGHistoryEvent implements DAGIDAware { private final HistoryEvent historyEvent; private final TezDAGID dagID; @@ -39,7 +40,8 @@ public HistoryEvent getHistoryEvent() { return historyEvent; } - public TezDAGID getDagID() { + @Override + public TezDAGID getDAGID() { return this.dagID; } diff --git a/tez-dag/src/main/java/org/apache/tez/dag/history/HistoryEvent.java b/tez-dag/src/main/java/org/apache/tez/dag/history/HistoryEvent.java index 1ca0d5f069..5b077e98ec 100644 --- a/tez-dag/src/main/java/org/apache/tez/dag/history/HistoryEvent.java +++ b/tez-dag/src/main/java/org/apache/tez/dag/history/HistoryEvent.java @@ -18,9 +18,10 @@ package org.apache.tez.dag.history; +import com.google.protobuf.CodedInputStream; +import com.google.protobuf.CodedOutputStream; + import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; public interface HistoryEvent { @@ -30,8 +31,8 @@ public interface HistoryEvent { public boolean isHistoryEvent(); - public void toProtoStream(OutputStream outputStream) throws IOException; + public void toProtoStream(CodedOutputStream outputStream) throws IOException; - public void fromProtoStream(InputStream inputStream) throws IOException; + public void fromProtoStream(CodedInputStream inputStream) throws IOException; } diff --git a/tez-dag/src/main/java/org/apache/tez/dag/history/HistoryEventHandler.java b/tez-dag/src/main/java/org/apache/tez/dag/history/HistoryEventHandler.java index 4fa1926ae9..f4dd789fd9 100644 --- a/tez-dag/src/main/java/org/apache/tez/dag/history/HistoryEventHandler.java +++ b/tez-dag/src/main/java/org/apache/tez/dag/history/HistoryEventHandler.java @@ -124,7 +124,7 @@ public void serviceStop() throws Exception { * @throws IOException */ public void handleCriticalEvent(DAGHistoryEvent event) throws IOException { - TezDAGID dagId = event.getDagID(); + TezDAGID dagId = event.getDAGID(); String dagIdStr = "N/A"; if(dagId != null) { dagIdStr = dagId.toString(); @@ -161,7 +161,7 @@ public void handleCriticalEvent(DAGHistoryEvent event) throws IOException { } private boolean shouldLogEvent(DAGHistoryEvent event) { - TezDAGID dagId = event.getDagID(); + TezDAGID dagId = event.getDAGID(); HistoryLogLevel dagLogLevel = null; if (dagId != null) { @@ -207,7 +207,7 @@ private boolean shouldLogTaskAttemptEvents(DAGHistoryEvent event, HistoryLogLeve if (dagLogLevel == HistoryLogLevel.TASK_ATTEMPT && (eventType == HistoryEventType.TASK_ATTEMPT_STARTED || eventType == HistoryEventType.TASK_ATTEMPT_FINISHED)) { - TezDAGID dagId = event.getDagID(); + TezDAGID dagId = event.getDAGID(); Set filters = null; if (dagId != null) { filters = dagIdToTaskAttemptFilters.get(dagId); diff --git a/tez-dag/src/main/java/org/apache/tez/dag/history/events/AMLaunchedEvent.java b/tez-dag/src/main/java/org/apache/tez/dag/history/events/AMLaunchedEvent.java index fa332d68eb..bb87fdb1fc 100644 --- a/tez-dag/src/main/java/org/apache/tez/dag/history/events/AMLaunchedEvent.java +++ b/tez-dag/src/main/java/org/apache/tez/dag/history/events/AMLaunchedEvent.java @@ -19,9 +19,10 @@ package org.apache.tez.dag.history.events; import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; +import com.google.protobuf.CodedInputStream; +import com.google.protobuf.CodedOutputStream; +import com.google.protobuf.ExtensionRegistry; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.util.ConverterUtils; import org.apache.tez.dag.history.HistoryEvent; @@ -84,13 +85,13 @@ public void fromProto(AMLaunchedProto proto) { } @Override - public void toProtoStream(OutputStream outputStream) throws IOException { - toProto().writeDelimitedTo(outputStream); + public void toProtoStream(CodedOutputStream outputStream) throws IOException { + outputStream.writeMessageNoTag(toProto()); } @Override - public void fromProtoStream(InputStream inputStream) throws IOException { - AMLaunchedProto proto = AMLaunchedProto.parseDelimitedFrom(inputStream); + public void fromProtoStream(CodedInputStream inputStream) throws IOException { + AMLaunchedProto proto = inputStream.readMessage(AMLaunchedProto.PARSER, ExtensionRegistry.getEmptyRegistry()); if (proto == null) { throw new IOException("No data found in stream"); } diff --git a/tez-dag/src/main/java/org/apache/tez/dag/history/events/AMStartedEvent.java b/tez-dag/src/main/java/org/apache/tez/dag/history/events/AMStartedEvent.java index 8a59d84769..6be528af96 100644 --- a/tez-dag/src/main/java/org/apache/tez/dag/history/events/AMStartedEvent.java +++ b/tez-dag/src/main/java/org/apache/tez/dag/history/events/AMStartedEvent.java @@ -19,9 +19,10 @@ package org.apache.tez.dag.history.events; import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; +import com.google.protobuf.CodedInputStream; +import com.google.protobuf.CodedOutputStream; +import com.google.protobuf.ExtensionRegistry; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.util.ConverterUtils; import org.apache.tez.dag.history.HistoryEvent; @@ -79,13 +80,13 @@ public void fromProto(AMStartedProto proto) { } @Override - public void toProtoStream(OutputStream outputStream) throws IOException { - toProto().writeDelimitedTo(outputStream); + public void toProtoStream(CodedOutputStream outputStream) throws IOException { + outputStream.writeMessageNoTag(toProto()); } @Override - public void fromProtoStream(InputStream inputStream) throws IOException { - AMStartedProto proto = AMStartedProto.parseDelimitedFrom(inputStream); + public void fromProtoStream(CodedInputStream inputStream) throws IOException { + AMStartedProto proto = inputStream.readMessage(AMStartedProto.PARSER, ExtensionRegistry.getEmptyRegistry()); if (proto == null) { throw new IOException("No data found in stream"); } diff --git a/tez-dag/src/main/java/org/apache/tez/dag/history/events/AppLaunchedEvent.java b/tez-dag/src/main/java/org/apache/tez/dag/history/events/AppLaunchedEvent.java index 08d2aff021..0b812f0184 100644 --- a/tez-dag/src/main/java/org/apache/tez/dag/history/events/AppLaunchedEvent.java +++ b/tez-dag/src/main/java/org/apache/tez/dag/history/events/AppLaunchedEvent.java @@ -19,9 +19,9 @@ package org.apache.tez.dag.history.events; import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; +import com.google.protobuf.CodedInputStream; +import com.google.protobuf.CodedOutputStream; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.tez.common.VersionInfo; @@ -67,12 +67,12 @@ public boolean isHistoryEvent() { } @Override - public void toProtoStream(OutputStream outputStream) throws IOException { + public void toProtoStream(CodedOutputStream outputStream) throws IOException { throw new UnsupportedOperationException("Not a recovery event"); } @Override - public void fromProtoStream(InputStream inputStream) throws IOException { + public void fromProtoStream(CodedInputStream inputStream) throws IOException { throw new UnsupportedOperationException("Not a recovery event"); } diff --git a/tez-dag/src/main/java/org/apache/tez/dag/history/events/ContainerLaunchedEvent.java b/tez-dag/src/main/java/org/apache/tez/dag/history/events/ContainerLaunchedEvent.java index 45d0261adf..9ec05926bb 100644 --- a/tez-dag/src/main/java/org/apache/tez/dag/history/events/ContainerLaunchedEvent.java +++ b/tez-dag/src/main/java/org/apache/tez/dag/history/events/ContainerLaunchedEvent.java @@ -19,9 +19,10 @@ package org.apache.tez.dag.history.events; import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; +import com.google.protobuf.CodedInputStream; +import com.google.protobuf.CodedOutputStream; +import com.google.protobuf.ExtensionRegistry; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.util.ConverterUtils; @@ -77,14 +78,14 @@ public void fromProto(ContainerLaunchedProto proto) { } @Override - public void toProtoStream(OutputStream outputStream) throws IOException { - toProto().writeDelimitedTo(outputStream); + public void toProtoStream(CodedOutputStream outputStream) throws IOException { + outputStream.writeMessageNoTag(toProto()); } @Override - public void fromProtoStream(InputStream inputStream) throws IOException { + public void fromProtoStream(CodedInputStream inputStream) throws IOException { ContainerLaunchedProto proto = - ContainerLaunchedProto.parseDelimitedFrom(inputStream); + inputStream.readMessage(ContainerLaunchedProto.PARSER, ExtensionRegistry.getEmptyRegistry()); if (proto == null) { throw new IOException("No data found in stream"); } diff --git a/tez-dag/src/main/java/org/apache/tez/dag/history/events/ContainerStoppedEvent.java b/tez-dag/src/main/java/org/apache/tez/dag/history/events/ContainerStoppedEvent.java index 86971ce520..9360e49643 100644 --- a/tez-dag/src/main/java/org/apache/tez/dag/history/events/ContainerStoppedEvent.java +++ b/tez-dag/src/main/java/org/apache/tez/dag/history/events/ContainerStoppedEvent.java @@ -19,9 +19,10 @@ package org.apache.tez.dag.history.events; import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; +import com.google.protobuf.CodedInputStream; +import com.google.protobuf.CodedOutputStream; +import com.google.protobuf.ExtensionRegistry; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.util.ConverterUtils; @@ -82,14 +83,14 @@ public void fromProto(ContainerStoppedProto proto) { } @Override - public void toProtoStream(OutputStream outputStream) throws IOException { - toProto().writeDelimitedTo(outputStream); + public void toProtoStream(CodedOutputStream outputStream) throws IOException { + outputStream.writeMessageNoTag(toProto()); } @Override - public void fromProtoStream(InputStream inputStream) throws IOException { + public void fromProtoStream(CodedInputStream inputStream) throws IOException { ContainerStoppedProto proto = - ContainerStoppedProto.parseDelimitedFrom(inputStream); + inputStream.readMessage(ContainerStoppedProto.PARSER, ExtensionRegistry.getEmptyRegistry()); if (proto == null) { throw new IOException("No data found in stream"); } diff --git a/tez-dag/src/main/java/org/apache/tez/dag/history/events/DAGCommitStartedEvent.java b/tez-dag/src/main/java/org/apache/tez/dag/history/events/DAGCommitStartedEvent.java index 016bb60b09..694d9474ff 100644 --- a/tez-dag/src/main/java/org/apache/tez/dag/history/events/DAGCommitStartedEvent.java +++ b/tez-dag/src/main/java/org/apache/tez/dag/history/events/DAGCommitStartedEvent.java @@ -19,9 +19,11 @@ package org.apache.tez.dag.history.events; import java.io.IOException; -import java.io.InputStream; import java.io.OutputStream; +import com.google.protobuf.CodedInputStream; +import com.google.protobuf.CodedOutputStream; +import com.google.protobuf.ExtensionRegistry; import org.apache.tez.dag.history.HistoryEvent; import org.apache.tez.dag.history.HistoryEventType; import org.apache.tez.dag.history.SummaryEvent; @@ -69,13 +71,14 @@ public void fromProto(DAGCommitStartedProto proto) { } @Override - public void toProtoStream(OutputStream outputStream) throws IOException { - toProto().writeDelimitedTo(outputStream); + public void toProtoStream(CodedOutputStream outputStream) throws IOException { + outputStream.writeMessageNoTag(toProto()); } @Override - public void fromProtoStream(InputStream inputStream) throws IOException { - DAGCommitStartedProto proto = DAGCommitStartedProto.parseDelimitedFrom(inputStream); + public void fromProtoStream(CodedInputStream inputStream) throws IOException { + DAGCommitStartedProto proto = + inputStream.readMessage(DAGCommitStartedProto.PARSER, ExtensionRegistry.getEmptyRegistry()); if (proto == null) { throw new IOException("No data found in stream"); } diff --git a/tez-dag/src/main/java/org/apache/tez/dag/history/events/DAGFinishedEvent.java b/tez-dag/src/main/java/org/apache/tez/dag/history/events/DAGFinishedEvent.java index c395297965..3364e1d2b2 100644 --- a/tez-dag/src/main/java/org/apache/tez/dag/history/events/DAGFinishedEvent.java +++ b/tez-dag/src/main/java/org/apache/tez/dag/history/events/DAGFinishedEvent.java @@ -19,10 +19,12 @@ package org.apache.tez.dag.history.events; import java.io.IOException; -import java.io.InputStream; import java.io.OutputStream; import java.util.Map; +import com.google.protobuf.CodedInputStream; +import com.google.protobuf.CodedOutputStream; +import com.google.protobuf.ExtensionRegistry; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.tez.common.counters.TezCounters; import org.apache.tez.dag.api.DagTypeConverters; @@ -31,6 +33,7 @@ import org.apache.tez.dag.history.HistoryEvent; import org.apache.tez.dag.history.HistoryEventType; import org.apache.tez.dag.history.SummaryEvent; +import org.apache.tez.dag.records.DAGIDAware; import org.apache.tez.dag.records.TezDAGID; import org.apache.tez.dag.recovery.records.RecoveryProtos; import org.apache.tez.dag.recovery.records.RecoveryProtos.DAGFinishedProto; @@ -39,7 +42,7 @@ import com.google.common.primitives.Ints; import com.google.protobuf.ByteString; -public class DAGFinishedEvent implements HistoryEvent, SummaryEvent { +public class DAGFinishedEvent implements HistoryEvent, SummaryEvent, DAGIDAware { private TezDAGID dagID; private long startTime; @@ -121,13 +124,13 @@ public void fromProto(DAGFinishedProto proto) { } @Override - public void toProtoStream(OutputStream outputStream) throws IOException { - toProto().writeDelimitedTo(outputStream); + public void toProtoStream(CodedOutputStream outputStream) throws IOException { + outputStream.writeMessageNoTag(toProto()); } @Override - public void fromProtoStream(InputStream inputStream) throws IOException { - DAGFinishedProto proto = DAGFinishedProto.parseDelimitedFrom(inputStream); + public void fromProtoStream(CodedInputStream inputStream) throws IOException { + DAGFinishedProto proto = inputStream.readMessage(DAGFinishedProto.PARSER, ExtensionRegistry.getEmptyRegistry()); if (proto == null) { throw new IOException("No data found in stream"); } @@ -178,7 +181,8 @@ public DAGState getState() { return state; } - public TezDAGID getDagID() { + @Override + public TezDAGID getDAGID() { return dagID; } diff --git a/tez-dag/src/main/java/org/apache/tez/dag/history/events/DAGInitializedEvent.java b/tez-dag/src/main/java/org/apache/tez/dag/history/events/DAGInitializedEvent.java index 98d64d3da8..31a53f714a 100644 --- a/tez-dag/src/main/java/org/apache/tez/dag/history/events/DAGInitializedEvent.java +++ b/tez-dag/src/main/java/org/apache/tez/dag/history/events/DAGInitializedEvent.java @@ -19,17 +19,20 @@ package org.apache.tez.dag.history.events; import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; import java.util.Map; +import com.google.protobuf.CodedInputStream; +import com.google.protobuf.CodedOutputStream; +import com.google.protobuf.ExtensionRegistry; import org.apache.tez.dag.history.HistoryEvent; import org.apache.tez.dag.history.HistoryEventType; +import org.apache.tez.dag.records.DAGIDAware; import org.apache.tez.dag.records.TezDAGID; import org.apache.tez.dag.records.TezVertexID; import org.apache.tez.dag.recovery.records.RecoveryProtos; +import org.apache.tez.dag.recovery.records.RecoveryProtos.DAGInitializedProto; -public class DAGInitializedEvent implements HistoryEvent { +public class DAGInitializedEvent implements HistoryEvent, DAGIDAware { private TezDAGID dagID; private long initTime; @@ -83,14 +86,14 @@ public void fromProto(RecoveryProtos.DAGInitializedProto proto) { } @Override - public void toProtoStream(OutputStream outputStream) throws IOException { - toProto().writeDelimitedTo(outputStream); + public void toProtoStream(CodedOutputStream outputStream) throws IOException { + outputStream.writeMessageNoTag(toProto()); } @Override - public void fromProtoStream(InputStream inputStream) throws IOException { - RecoveryProtos.DAGInitializedProto proto = - RecoveryProtos.DAGInitializedProto.parseDelimitedFrom(inputStream); + public void fromProtoStream(CodedInputStream inputStream) throws IOException { + DAGInitializedProto proto = + inputStream.readMessage(DAGInitializedProto.PARSER, ExtensionRegistry.getEmptyRegistry()); if (proto == null) { throw new IOException("No data found in stream"); } @@ -101,7 +104,8 @@ public long getInitTime() { return this.initTime; } - public TezDAGID getDagID() { + @Override + public TezDAGID getDAGID() { return dagID; } diff --git a/tez-dag/src/main/java/org/apache/tez/dag/history/events/DAGKillRequestEvent.java b/tez-dag/src/main/java/org/apache/tez/dag/history/events/DAGKillRequestEvent.java index 525e361364..b9e3da845c 100644 --- a/tez-dag/src/main/java/org/apache/tez/dag/history/events/DAGKillRequestEvent.java +++ b/tez-dag/src/main/java/org/apache/tez/dag/history/events/DAGKillRequestEvent.java @@ -18,14 +18,17 @@ package org.apache.tez.dag.history.events; import java.io.IOException; -import java.io.InputStream; import java.io.OutputStream; +import com.google.protobuf.CodedInputStream; +import com.google.protobuf.CodedOutputStream; +import com.google.protobuf.ExtensionRegistry; import org.apache.tez.dag.history.HistoryEvent; import org.apache.tez.dag.history.HistoryEventType; import org.apache.tez.dag.history.SummaryEvent; import org.apache.tez.dag.records.TezDAGID; import org.apache.tez.dag.recovery.records.RecoveryProtos; +import org.apache.tez.dag.recovery.records.RecoveryProtos.DAGKillRequestProto; import org.apache.tez.dag.recovery.records.RecoveryProtos.SummaryEventProto; import org.apache.tez.dag.utils.ProtoUtils; @@ -60,12 +63,12 @@ public boolean isHistoryEvent() { } @Override - public void toProtoStream(OutputStream outputStream) throws IOException { - toProto().writeDelimitedTo(outputStream); + public void toProtoStream(CodedOutputStream outputStream) throws IOException { + outputStream.writeMessageNoTag(toProto()); } - public RecoveryProtos.DAGKillRequestProto toProto() { - return RecoveryProtos.DAGKillRequestProto.newBuilder() + public DAGKillRequestProto toProto() { + return DAGKillRequestProto.newBuilder() .setDagId(dagID.toString()) .setKillRequestTime(killRequestTime) .setIsSessionStopped(isSessionStopped) @@ -73,9 +76,9 @@ public RecoveryProtos.DAGKillRequestProto toProto() { } @Override - public void fromProtoStream(InputStream inputStream) throws IOException { - RecoveryProtos.DAGKillRequestProto proto = - RecoveryProtos.DAGKillRequestProto.parseDelimitedFrom(inputStream); + public void fromProtoStream(CodedInputStream inputStream) throws IOException { + DAGKillRequestProto proto = + inputStream.readMessage(DAGKillRequestProto.PARSER, ExtensionRegistry.getEmptyRegistry()); if (proto == null) { throw new IOException("No data found in stream"); } diff --git a/tez-dag/src/main/java/org/apache/tez/dag/history/events/DAGRecoveredEvent.java b/tez-dag/src/main/java/org/apache/tez/dag/history/events/DAGRecoveredEvent.java index 2bfa43b894..e5f5614819 100644 --- a/tez-dag/src/main/java/org/apache/tez/dag/history/events/DAGRecoveredEvent.java +++ b/tez-dag/src/main/java/org/apache/tez/dag/history/events/DAGRecoveredEvent.java @@ -19,9 +19,9 @@ package org.apache.tez.dag.history.events; import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; +import com.google.protobuf.CodedInputStream; +import com.google.protobuf.CodedOutputStream; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.tez.dag.app.dag.DAGState; import org.apache.tez.dag.history.HistoryEvent; @@ -76,13 +76,13 @@ public boolean isHistoryEvent() { } @Override - public void toProtoStream(OutputStream outputStream) throws IOException { + public void toProtoStream(CodedOutputStream outputStream) throws IOException { throw new UnsupportedOperationException("Invalid operation for eventType " + getEventType().name()); } @Override - public void fromProtoStream(InputStream inputStream) throws IOException { + public void fromProtoStream(CodedInputStream inputStream) throws IOException { throw new UnsupportedOperationException("Invalid operation for eventType " + getEventType().name()); } diff --git a/tez-dag/src/main/java/org/apache/tez/dag/history/events/DAGStartedEvent.java b/tez-dag/src/main/java/org/apache/tez/dag/history/events/DAGStartedEvent.java index d0e0e693a5..d49ad3706f 100644 --- a/tez-dag/src/main/java/org/apache/tez/dag/history/events/DAGStartedEvent.java +++ b/tez-dag/src/main/java/org/apache/tez/dag/history/events/DAGStartedEvent.java @@ -19,16 +19,18 @@ package org.apache.tez.dag.history.events; import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; +import com.google.protobuf.CodedInputStream; +import com.google.protobuf.CodedOutputStream; +import com.google.protobuf.ExtensionRegistry; import org.apache.tez.dag.app.dag.DAGState; import org.apache.tez.dag.history.HistoryEvent; import org.apache.tez.dag.history.HistoryEventType; +import org.apache.tez.dag.records.DAGIDAware; import org.apache.tez.dag.records.TezDAGID; import org.apache.tez.dag.recovery.records.RecoveryProtos.DAGStartedProto; -public class DAGStartedEvent implements HistoryEvent { +public class DAGStartedEvent implements HistoryEvent, DAGIDAware { private TezDAGID dagID; private long startTime; @@ -74,13 +76,13 @@ public void fromProto(DAGStartedProto proto) { } @Override - public void toProtoStream(OutputStream outputStream) throws IOException { - toProto().writeDelimitedTo(outputStream); + public void toProtoStream(CodedOutputStream outputStream) throws IOException { + outputStream.writeMessageNoTag(toProto()); } @Override - public void fromProtoStream(InputStream inputStream) throws IOException { - DAGStartedProto proto = DAGStartedProto.parseDelimitedFrom(inputStream); + public void fromProtoStream(CodedInputStream inputStream) throws IOException { + DAGStartedProto proto = inputStream.readMessage(DAGStartedProto.PARSER, ExtensionRegistry.getEmptyRegistry()); if (proto == null) { throw new IOException("No data found in stream"); } @@ -97,7 +99,8 @@ public long getStartTime() { return this.startTime; } - public TezDAGID getDagID() { + @Override + public TezDAGID getDAGID() { return dagID; } diff --git a/tez-dag/src/main/java/org/apache/tez/dag/history/events/DAGSubmittedEvent.java b/tez-dag/src/main/java/org/apache/tez/dag/history/events/DAGSubmittedEvent.java index 1b1fdf38a9..ac9b923303 100644 --- a/tez-dag/src/main/java/org/apache/tez/dag/history/events/DAGSubmittedEvent.java +++ b/tez-dag/src/main/java/org/apache/tez/dag/history/events/DAGSubmittedEvent.java @@ -19,10 +19,13 @@ package org.apache.tez.dag.history.events; import java.io.IOException; -import java.io.InputStream; import java.io.OutputStream; import java.util.Map; +import com.google.protobuf.CodedInputStream; +import com.google.protobuf.CodedOutputStream; +import com.google.protobuf.ExtensionRegistry; +import org.apache.tez.dag.records.DAGIDAware; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; @@ -41,7 +44,7 @@ import org.apache.tez.dag.utils.ProtoUtils; -public class DAGSubmittedEvent implements HistoryEvent, SummaryEvent { +public class DAGSubmittedEvent implements HistoryEvent, SummaryEvent, DAGIDAware { private static final Logger LOG = LoggerFactory.getLogger(DAGSubmittedEvent.class); @@ -126,13 +129,13 @@ public void fromProto(DAGSubmittedProto proto) { } @Override - public void toProtoStream(OutputStream outputStream) throws IOException { - toProto().writeDelimitedTo(outputStream); + public void toProtoStream(CodedOutputStream outputStream) throws IOException { + outputStream.writeMessageNoTag(toProto()); } @Override - public void fromProtoStream(InputStream inputStream) throws IOException { - DAGSubmittedProto proto = DAGSubmittedProto.parseDelimitedFrom(inputStream); + public void fromProtoStream(CodedInputStream inputStream) throws IOException { + DAGSubmittedProto proto = inputStream.readMessage(DAGSubmittedProto.PARSER, ExtensionRegistry.getEmptyRegistry()); if (proto == null) { throw new IOException("No data found in stream"); } @@ -173,7 +176,8 @@ public DAGProtos.DAGPlan getDAGPlan() { return this.dagPlan; } - public TezDAGID getDagID() { + @Override + public TezDAGID getDAGID() { return dagID; } diff --git a/tez-dag/src/main/java/org/apache/tez/dag/history/events/TaskAttemptFinishedEvent.java b/tez-dag/src/main/java/org/apache/tez/dag/history/events/TaskAttemptFinishedEvent.java index e9100e8ab9..41b0ed6643 100644 --- a/tez-dag/src/main/java/org/apache/tez/dag/history/events/TaskAttemptFinishedEvent.java +++ b/tez-dag/src/main/java/org/apache/tez/dag/history/events/TaskAttemptFinishedEvent.java @@ -20,11 +20,15 @@ import javax.annotation.Nullable; import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; import java.util.List; +import com.google.protobuf.CodedInputStream; +import com.google.protobuf.CodedOutputStream; +import com.google.protobuf.ExtensionRegistry; import org.apache.tez.common.TezConverterUtils; +import org.apache.tez.common.counters.CounterGroup; +import org.apache.tez.common.counters.TezCounter; +import org.apache.tez.dag.records.TaskAttemptIDAware; import org.apache.tez.runtime.api.TaskFailureType; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -48,7 +52,7 @@ import org.apache.tez.dag.recovery.records.RecoveryProtos.TezEventProto; import org.apache.tez.runtime.api.impl.TezEvent; -public class TaskAttemptFinishedEvent implements HistoryEvent { +public class TaskAttemptFinishedEvent implements HistoryEvent, TaskAttemptIDAware { private static final Logger LOG = LoggerFactory.getLogger(TaskAttemptFinishedEvent.class); @@ -226,14 +230,14 @@ public void fromProto(TaskAttemptFinishedProto proto) throws IOException { } @Override - public void toProtoStream(OutputStream outputStream) throws IOException { - toProto().writeDelimitedTo(outputStream); + public void toProtoStream(CodedOutputStream outputStream) throws IOException { + outputStream.writeMessageNoTag(toProto()); } @Override - public void fromProtoStream(InputStream inputStream) throws IOException { + public void fromProtoStream(CodedInputStream inputStream) throws IOException { TaskAttemptFinishedProto proto = - TaskAttemptFinishedProto.parseDelimitedFrom(inputStream); + inputStream.readMessage(TaskAttemptFinishedProto.PARSER, ExtensionRegistry.getEmptyRegistry()); if (proto == null) { throw new IOException("No data found in stream"); } @@ -242,29 +246,71 @@ public void fromProtoStream(InputStream inputStream) throws IOException { @Override public String toString() { - String counterStr = ""; + StringBuilder sb = new StringBuilder(); + sb.append("vertexName="); + sb.append(vertexName); + sb.append(", taskAttemptId="); + sb.append(taskAttemptId); + sb.append(", creationTime="); + sb.append(creationTime); + sb.append(", allocationTime="); + sb.append(allocationTime); + sb.append(", startTime="); + sb.append(startTime); + sb.append(", finishTime="); + sb.append(finishTime); + sb.append(", timeTaken="); + sb.append(finishTime - startTime); + sb.append(", status="); + sb.append(state.name()); + + if (taskFailureType != null) { + sb.append(", taskFailureType="); + sb.append(taskFailureType); + } + if (error != null) { + sb.append(", errorEnum="); + sb.append(error); + } + if (diagnostics != null) { + sb.append(", diagnostics="); + sb.append(diagnostics); + } + if (containerId != null) { + sb.append(", containerId="); + sb.append(containerId); + } + if (nodeId != null) { + sb.append(", nodeId="); + sb.append(nodeId); + } + if (nodeHttpAddress != null) { + sb.append(", nodeHttpAddress="); + sb.append(nodeHttpAddress); + } + if (state != TaskAttemptState.SUCCEEDED) { - counterStr = ", counters=" + ( tezCounters == null ? "null" : - tezCounters.toString() - .replaceAll("\\n", ", ").replaceAll("\\s+", " ")); + sb.append(", counters="); + if (tezCounters == null) { + sb.append("null"); + } else { + sb.append("Counters: "); + sb.append(tezCounters.countCounters()); + for (CounterGroup group : tezCounters) { + sb.append(", "); + sb.append(group.getDisplayName()); + for (TezCounter counter : group) { + sb.append(", "); + sb.append(counter.getDisplayName()).append("=") + .append(counter.getValue()); + } + } + } } - return "vertexName=" + vertexName - + ", taskAttemptId=" + taskAttemptId - + ", creationTime=" + creationTime - + ", allocationTime=" + allocationTime - + ", startTime=" + startTime - + ", finishTime=" + finishTime - + ", timeTaken=" + (finishTime - startTime) - + ", status=" + state.name() - + (taskFailureType != null ? ", taskFailureType=" + taskFailureType : "") - + (error != null ? ", errorEnum=" + error.name() : "") - + (diagnostics != null ? ", diagnostics=" + diagnostics : "") - + (containerId != null ? ", containerId=" + containerId.toString() : "") - + (nodeId != null ? ", nodeId=" + nodeId.toString() : "") - + (nodeHttpAddress != null ? ", nodeHttpAddress=" + nodeHttpAddress : "") - + counterStr; + return sb.toString(); } + @Override public TezTaskAttemptID getTaskAttemptID() { return taskAttemptId; } diff --git a/tez-dag/src/main/java/org/apache/tez/dag/history/events/TaskAttemptStartedEvent.java b/tez-dag/src/main/java/org/apache/tez/dag/history/events/TaskAttemptStartedEvent.java index 71d4419ee7..1442ff4b43 100644 --- a/tez-dag/src/main/java/org/apache/tez/dag/history/events/TaskAttemptStartedEvent.java +++ b/tez-dag/src/main/java/org/apache/tez/dag/history/events/TaskAttemptStartedEvent.java @@ -19,18 +19,20 @@ package org.apache.tez.dag.history.events; import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; +import com.google.protobuf.CodedInputStream; +import com.google.protobuf.CodedOutputStream; +import com.google.protobuf.ExtensionRegistry; import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.NodeId; import org.apache.hadoop.yarn.util.ConverterUtils; import org.apache.tez.dag.history.HistoryEvent; import org.apache.tez.dag.history.HistoryEventType; +import org.apache.tez.dag.records.TaskAttemptIDAware; import org.apache.tez.dag.records.TezTaskAttemptID; import org.apache.tez.dag.recovery.records.RecoveryProtos.TaskAttemptStartedProto; -public class TaskAttemptStartedEvent implements HistoryEvent { +public class TaskAttemptStartedEvent implements HistoryEvent, TaskAttemptIDAware { private TezTaskAttemptID taskAttemptId; private String inProgressLogsUrl; @@ -91,13 +93,14 @@ public void fromProto(TaskAttemptStartedProto proto) { } @Override - public void toProtoStream(OutputStream outputStream) throws IOException { - toProto().writeDelimitedTo(outputStream); + public void toProtoStream(CodedOutputStream outputStream) throws IOException { + outputStream.writeMessageNoTag(toProto()); } @Override - public void fromProtoStream(InputStream inputStream) throws IOException { - TaskAttemptStartedProto proto = TaskAttemptStartedProto.parseDelimitedFrom(inputStream); + public void fromProtoStream(CodedInputStream inputStream) throws IOException { + TaskAttemptStartedProto proto = + inputStream.readMessage(TaskAttemptStartedProto.PARSER, ExtensionRegistry.getEmptyRegistry()); if (proto == null) { throw new IOException("No data found in stream"); } @@ -113,6 +116,7 @@ public String toString() { + ", nodeId=" + nodeId; } + @Override public TezTaskAttemptID getTaskAttemptID() { return this.taskAttemptId; } diff --git a/tez-dag/src/main/java/org/apache/tez/dag/history/events/TaskFinishedEvent.java b/tez-dag/src/main/java/org/apache/tez/dag/history/events/TaskFinishedEvent.java index 71ff6c84b7..fd0a5fc6aa 100644 --- a/tez-dag/src/main/java/org/apache/tez/dag/history/events/TaskFinishedEvent.java +++ b/tez-dag/src/main/java/org/apache/tez/dag/history/events/TaskFinishedEvent.java @@ -19,9 +19,13 @@ package org.apache.tez.dag.history.events; import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; +import com.google.protobuf.CodedInputStream; +import com.google.protobuf.CodedOutputStream; +import com.google.protobuf.ExtensionRegistry; +import org.apache.tez.common.counters.CounterGroup; +import org.apache.tez.common.counters.TezCounter; +import org.apache.tez.dag.records.TaskIDAware; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.tez.common.counters.TezCounters; @@ -32,7 +36,7 @@ import org.apache.tez.dag.records.TezTaskID; import org.apache.tez.dag.recovery.records.RecoveryProtos.TaskFinishedProto; -public class TaskFinishedEvent implements HistoryEvent { +public class TaskFinishedEvent implements HistoryEvent, TaskIDAware { private static final Logger LOG = LoggerFactory.getLogger(TaskFinishedEvent.class); @@ -107,13 +111,13 @@ public void fromProto(TaskFinishedProto proto) { } @Override - public void toProtoStream(OutputStream outputStream) throws IOException { - toProto().writeDelimitedTo(outputStream); + public void toProtoStream(CodedOutputStream outputStream) throws IOException { + outputStream.writeMessageNoTag(toProto()); } @Override - public void fromProtoStream(InputStream inputStream) throws IOException { - TaskFinishedProto proto = TaskFinishedProto.parseDelimitedFrom(inputStream); + public void fromProtoStream(CodedInputStream inputStream) throws IOException { + TaskFinishedProto proto = inputStream.readMessage(TaskFinishedProto.PARSER, ExtensionRegistry.getEmptyRegistry()); if (proto == null) { throw new IOException("No data found in stream"); } @@ -122,20 +126,43 @@ public void fromProtoStream(InputStream inputStream) throws IOException { @Override public String toString() { - return "vertexName=" + vertexName - + ", taskId=" + taskID - + ", startTime=" + startTime - + ", finishTime=" + finishTime - + ", timeTaken=" + (finishTime - startTime) - + ", status=" + state.name() - + ", successfulAttemptID=" + (successfulAttemptID == null ? "null" : - successfulAttemptID.toString()) - + ", diagnostics=" + diagnostics - + ", counters=" + ( tezCounters == null ? "null" : - tezCounters.toString() - .replaceAll("\\n", ", ").replaceAll("\\s+", " ")); + StringBuilder sb = new StringBuilder(); + sb.append("vertexName="); + sb.append(vertexName); + sb.append(", taskId="); + sb.append(taskID); + sb.append(", startTime="); + sb.append(startTime); + sb.append(", finishTime="); + sb.append(finishTime); + sb.append(", timeTaken="); + sb.append(finishTime - startTime); + sb.append(", status="); + sb.append(state.name()); + sb.append(", successfulAttemptID="); + sb.append(successfulAttemptID); + sb.append(", diagnostics="); + sb.append(diagnostics); + sb.append(", counters="); + if (tezCounters == null) { + sb.append("null"); + } else { + sb.append("Counters: "); + sb.append(tezCounters.countCounters()); + for (CounterGroup group : tezCounters) { + sb.append(", "); + sb.append(group.getDisplayName()); + for (TezCounter counter : group) { + sb.append(", "); + sb.append(counter.getDisplayName()).append("=") + .append(counter.getValue()); + } + } + } + return sb.toString(); } + @Override public TezTaskID getTaskID() { return taskID; } diff --git a/tez-dag/src/main/java/org/apache/tez/dag/history/events/TaskStartedEvent.java b/tez-dag/src/main/java/org/apache/tez/dag/history/events/TaskStartedEvent.java index 7516090a7c..1379e04e19 100644 --- a/tez-dag/src/main/java/org/apache/tez/dag/history/events/TaskStartedEvent.java +++ b/tez-dag/src/main/java/org/apache/tez/dag/history/events/TaskStartedEvent.java @@ -19,27 +19,37 @@ package org.apache.tez.dag.history.events; import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; +import com.google.protobuf.CodedInputStream; +import com.google.protobuf.CodedOutputStream; +import com.google.protobuf.ExtensionRegistry; +import org.apache.tez.dag.api.oldrecords.TaskState; import org.apache.tez.dag.history.HistoryEvent; import org.apache.tez.dag.history.HistoryEventType; +import org.apache.tez.dag.records.TaskIDAware; import org.apache.tez.dag.records.TezTaskID; import org.apache.tez.dag.recovery.records.RecoveryProtos.TaskStartedProto; -public class TaskStartedEvent implements HistoryEvent { +public class TaskStartedEvent implements HistoryEvent, TaskIDAware { private TezTaskID taskID; private String vertexName; private long scheduledTime; private long startTime; + private TaskState state; public TaskStartedEvent(TezTaskID taskId, String vertexName, long scheduledTime, long startTime) { + this(taskId, vertexName, scheduledTime, startTime, TaskState.SCHEDULED); + } + + public TaskStartedEvent(TezTaskID taskId, + String vertexName, long scheduledTime, long startTime, TaskState state) { this.vertexName = vertexName; this.taskID = taskId; this.scheduledTime = scheduledTime; this.startTime = startTime; + this.state = state; } public TaskStartedEvent() { @@ -75,13 +85,13 @@ public void fromProto(TaskStartedProto proto) { } @Override - public void toProtoStream(OutputStream outputStream) throws IOException { - toProto().writeDelimitedTo(outputStream); + public void toProtoStream(CodedOutputStream outputStream) throws IOException { + outputStream.writeMessageNoTag(toProto()); } @Override - public void fromProtoStream(InputStream inputStream) throws IOException { - TaskStartedProto proto = TaskStartedProto.parseDelimitedFrom(inputStream); + public void fromProtoStream(CodedInputStream inputStream) throws IOException { + TaskStartedProto proto = inputStream.readMessage(TaskStartedProto.PARSER, ExtensionRegistry.getEmptyRegistry()); if (proto == null) { throw new IOException("No data found in stream"); } @@ -96,6 +106,7 @@ public String toString() { + ", launchTime=" + startTime; } + @Override public TezTaskID getTaskID() { return taskID; } @@ -108,4 +119,7 @@ public long getStartTime() { return startTime; } + public TaskState getState() { + return state; + } } diff --git a/tez-dag/src/main/java/org/apache/tez/dag/history/events/VertexCommitStartedEvent.java b/tez-dag/src/main/java/org/apache/tez/dag/history/events/VertexCommitStartedEvent.java index c4521870e6..cb05bdd60b 100644 --- a/tez-dag/src/main/java/org/apache/tez/dag/history/events/VertexCommitStartedEvent.java +++ b/tez-dag/src/main/java/org/apache/tez/dag/history/events/VertexCommitStartedEvent.java @@ -19,10 +19,12 @@ package org.apache.tez.dag.history.events; import java.io.IOException; -import java.io.InputStream; import java.io.OutputStream; import java.nio.charset.Charset; +import com.google.protobuf.CodedInputStream; +import com.google.protobuf.CodedOutputStream; +import com.google.protobuf.ExtensionRegistry; import org.apache.tez.dag.history.HistoryEvent; import org.apache.tez.dag.history.HistoryEventType; import org.apache.tez.dag.history.SummaryEvent; @@ -73,13 +75,14 @@ public void fromProto(VertexCommitStartedProto proto) { } @Override - public void toProtoStream(OutputStream outputStream) throws IOException { - toProto().writeDelimitedTo(outputStream); + public void toProtoStream(CodedOutputStream outputStream) throws IOException { + outputStream.writeMessageNoTag(toProto()); } @Override - public void fromProtoStream(InputStream inputStream) throws IOException { - VertexCommitStartedProto proto = VertexCommitStartedProto.parseDelimitedFrom(inputStream); + public void fromProtoStream(CodedInputStream inputStream) throws IOException { + VertexCommitStartedProto proto = + inputStream.readMessage(VertexCommitStartedProto.PARSER, ExtensionRegistry.getEmptyRegistry()); if (proto == null) { throw new IOException("No data found in stream"); } @@ -98,7 +101,7 @@ public TezVertexID getVertexID() { @Override public void toSummaryProtoStream(OutputStream outputStream) throws IOException { SummaryEventProto.Builder builder = RecoveryProtos.SummaryEventProto.newBuilder() - .setDagId(vertexID.getDAGId().toString()) + .setDagId(vertexID.getDAGID().toString()) .setTimestamp(commitStartTime) .setEventType(getEventType().ordinal()) .setEventPayload( diff --git a/tez-dag/src/main/java/org/apache/tez/dag/history/events/VertexConfigurationDoneEvent.java b/tez-dag/src/main/java/org/apache/tez/dag/history/events/VertexConfigurationDoneEvent.java index 137342cf21..d5a1ce5b2d 100644 --- a/tez-dag/src/main/java/org/apache/tez/dag/history/events/VertexConfigurationDoneEvent.java +++ b/tez-dag/src/main/java/org/apache/tez/dag/history/events/VertexConfigurationDoneEvent.java @@ -18,18 +18,20 @@ package org.apache.tez.dag.history.events; import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; import java.util.HashMap; import java.util.Map; import java.util.Map.Entry; +import com.google.protobuf.CodedInputStream; +import com.google.protobuf.CodedOutputStream; +import com.google.protobuf.ExtensionRegistry; import org.apache.tez.dag.api.DagTypeConverters; import org.apache.tez.dag.api.EdgeProperty; import org.apache.tez.dag.api.VertexLocationHint; import org.apache.tez.dag.history.HistoryEvent; import org.apache.tez.dag.history.HistoryEventType; import org.apache.tez.dag.records.TezVertexID; +import org.apache.tez.dag.records.VertexIDAware; import org.apache.tez.dag.recovery.records.RecoveryProtos.EdgeManagerDescriptorProto; import org.apache.tez.dag.recovery.records.RecoveryProtos.RootInputSpecUpdateProto; import org.apache.tez.dag.recovery.records.RecoveryProtos.VertexConfigurationDoneProto; @@ -37,7 +39,7 @@ import com.google.common.collect.Maps; -public class VertexConfigurationDoneEvent implements HistoryEvent { +public class VertexConfigurationDoneEvent implements HistoryEvent, VertexIDAware { private TezVertexID vertexID; private long reconfigureDoneTime; @@ -155,13 +157,14 @@ public void fromProto(VertexConfigurationDoneProto proto) { } @Override - public void toProtoStream(OutputStream outputStream) throws IOException { - toProto().writeDelimitedTo(outputStream); + public void toProtoStream(CodedOutputStream outputStream) throws IOException { + outputStream.writeMessageNoTag(toProto()); } @Override - public void fromProtoStream(InputStream inputStream) throws IOException { - VertexConfigurationDoneProto proto = VertexConfigurationDoneProto.parseDelimitedFrom(inputStream); + public void fromProtoStream(CodedInputStream inputStream) throws IOException { + VertexConfigurationDoneProto proto = + inputStream.readMessage(VertexConfigurationDoneProto.PARSER, ExtensionRegistry.getEmptyRegistry()); if (proto == null) { throw new IOException("No data found in stream"); } @@ -182,6 +185,7 @@ public String toString() { + ", setParallelismCalledFlag=" + setParallelismCalledFlag; } + @Override public TezVertexID getVertexID() { return this.vertexID; } diff --git a/tez-dag/src/main/java/org/apache/tez/dag/history/events/VertexFinishedEvent.java b/tez-dag/src/main/java/org/apache/tez/dag/history/events/VertexFinishedEvent.java index a2cdae2de6..4288a2f81a 100644 --- a/tez-dag/src/main/java/org/apache/tez/dag/history/events/VertexFinishedEvent.java +++ b/tez-dag/src/main/java/org/apache/tez/dag/history/events/VertexFinishedEvent.java @@ -19,11 +19,14 @@ package org.apache.tez.dag.history.events; import java.io.IOException; -import java.io.InputStream; import java.io.OutputStream; import java.util.Map; +import com.google.protobuf.CodedInputStream; +import com.google.protobuf.CodedOutputStream; +import com.google.protobuf.ExtensionRegistry; import org.apache.tez.dag.app.dag.impl.ServicePluginInfo; +import org.apache.tez.dag.records.VertexIDAware; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.tez.common.counters.TezCounters; @@ -38,7 +41,7 @@ import org.apache.tez.dag.recovery.records.RecoveryProtos.VertexFinishStateProto; import org.apache.tez.dag.recovery.records.RecoveryProtos.VertexFinishedProto; -public class VertexFinishedEvent implements HistoryEvent, SummaryEvent { +public class VertexFinishedEvent implements HistoryEvent, SummaryEvent, VertexIDAware { private static final Logger LOG = LoggerFactory.getLogger(VertexFinishedEvent.class); @@ -123,13 +126,14 @@ public void fromProto(VertexFinishedProto proto) { } @Override - public void toProtoStream(OutputStream outputStream) throws IOException { - toProto().writeDelimitedTo(outputStream); + public void toProtoStream(CodedOutputStream outputStream) throws IOException { + outputStream.writeMessageNoTag(toProto()); } @Override - public void fromProtoStream(InputStream inputStream) throws IOException { - VertexFinishedProto proto = VertexFinishedProto.parseDelimitedFrom(inputStream); + public void fromProtoStream(CodedInputStream inputStream) throws IOException { + VertexFinishedProto proto = + inputStream.readMessage(VertexFinishedProto.PARSER, ExtensionRegistry.getEmptyRegistry()); if (proto == null) { throw new IOException("No data found in stream"); } @@ -156,6 +160,7 @@ public String toString() { + (servicePluginInfo != null ? servicePluginInfo : "null"); } + @Override public TezVertexID getVertexID() { return this.vertexID; } @@ -206,7 +211,7 @@ public void toSummaryProtoStream(OutputStream outputStream) throws IOException { .build(); SummaryEventProto.Builder builder = RecoveryProtos.SummaryEventProto.newBuilder() - .setDagId(vertexID.getDAGId().toString()) + .setDagId(vertexID.getDAGID().toString()) .setTimestamp(finishTime) .setEventType(getEventType().ordinal()) .setEventPayload(finishStateProto.toByteString()); diff --git a/tez-dag/src/main/java/org/apache/tez/dag/history/events/VertexGroupCommitFinishedEvent.java b/tez-dag/src/main/java/org/apache/tez/dag/history/events/VertexGroupCommitFinishedEvent.java index ec8f3e1a4f..7b3d0d4463 100644 --- a/tez-dag/src/main/java/org/apache/tez/dag/history/events/VertexGroupCommitFinishedEvent.java +++ b/tez-dag/src/main/java/org/apache/tez/dag/history/events/VertexGroupCommitFinishedEvent.java @@ -19,10 +19,12 @@ package org.apache.tez.dag.history.events; import java.io.IOException; -import java.io.InputStream; import java.io.OutputStream; import java.util.Collection; +import com.google.protobuf.CodedInputStream; +import com.google.protobuf.CodedOutputStream; +import com.google.protobuf.ExtensionRegistry; import org.apache.tez.dag.history.HistoryEvent; import org.apache.tez.dag.history.HistoryEventType; import org.apache.tez.dag.history.SummaryEvent; @@ -94,13 +96,14 @@ public TezVertexID apply(String input) { } @Override - public void toProtoStream(OutputStream outputStream) throws IOException { - toProto().writeDelimitedTo(outputStream); + public void toProtoStream(CodedOutputStream outputStream) throws IOException { + outputStream.writeMessageNoTag(toProto()); } @Override - public void fromProtoStream(InputStream inputStream) throws IOException { - VertexGroupCommitFinishedProto proto = VertexGroupCommitFinishedProto.parseDelimitedFrom(inputStream); + public void fromProtoStream(CodedInputStream inputStream) throws IOException { + VertexGroupCommitFinishedProto proto = + inputStream.readMessage(VertexGroupCommitFinishedProto.PARSER, ExtensionRegistry.getEmptyRegistry()); if (proto == null) { throw new IOException("No data found in stream"); } diff --git a/tez-dag/src/main/java/org/apache/tez/dag/history/events/VertexGroupCommitStartedEvent.java b/tez-dag/src/main/java/org/apache/tez/dag/history/events/VertexGroupCommitStartedEvent.java index 3de355cc64..d615debeb3 100644 --- a/tez-dag/src/main/java/org/apache/tez/dag/history/events/VertexGroupCommitStartedEvent.java +++ b/tez-dag/src/main/java/org/apache/tez/dag/history/events/VertexGroupCommitStartedEvent.java @@ -19,10 +19,12 @@ package org.apache.tez.dag.history.events; import java.io.IOException; -import java.io.InputStream; import java.io.OutputStream; import java.util.Collection; +import com.google.protobuf.CodedInputStream; +import com.google.protobuf.CodedOutputStream; +import com.google.protobuf.ExtensionRegistry; import org.apache.tez.dag.history.HistoryEvent; import org.apache.tez.dag.history.HistoryEventType; import org.apache.tez.dag.history.SummaryEvent; @@ -94,13 +96,14 @@ public TezVertexID apply(String input) { } @Override - public void toProtoStream(OutputStream outputStream) throws IOException { - toProto().writeDelimitedTo(outputStream); + public void toProtoStream(CodedOutputStream outputStream) throws IOException { + outputStream.writeMessageNoTag(toProto()); } @Override - public void fromProtoStream(InputStream inputStream) throws IOException { - VertexGroupCommitStartedProto proto = VertexGroupCommitStartedProto.parseDelimitedFrom(inputStream); + public void fromProtoStream(CodedInputStream inputStream) throws IOException { + VertexGroupCommitStartedProto proto = + inputStream.readMessage(VertexGroupCommitStartedProto.PARSER, ExtensionRegistry.getEmptyRegistry()); if (proto == null) { throw new IOException("No data found in stream"); } diff --git a/tez-dag/src/main/java/org/apache/tez/dag/history/events/VertexInitializedEvent.java b/tez-dag/src/main/java/org/apache/tez/dag/history/events/VertexInitializedEvent.java index 90099fce22..285c52001e 100644 --- a/tez-dag/src/main/java/org/apache/tez/dag/history/events/VertexInitializedEvent.java +++ b/tez-dag/src/main/java/org/apache/tez/dag/history/events/VertexInitializedEvent.java @@ -19,12 +19,13 @@ package org.apache.tez.dag.history.events; import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; +import com.google.protobuf.CodedInputStream; +import com.google.protobuf.CodedOutputStream; +import com.google.protobuf.ExtensionRegistry; import org.apache.tez.dag.api.DagTypeConverters; import org.apache.tez.dag.api.InputDescriptor; import org.apache.tez.dag.api.InputInitializerDescriptor; @@ -35,6 +36,7 @@ import org.apache.tez.dag.history.HistoryEventType; import org.apache.tez.dag.history.utils.TezEventUtils; import org.apache.tez.dag.records.TezVertexID; +import org.apache.tez.dag.records.VertexIDAware; import org.apache.tez.dag.recovery.records.RecoveryProtos; import org.apache.tez.dag.recovery.records.RecoveryProtos.TezEventProto; import org.apache.tez.dag.recovery.records.RecoveryProtos.VertexInitializedProto; @@ -42,7 +44,7 @@ import com.google.common.collect.Lists; -public class VertexInitializedEvent implements HistoryEvent { +public class VertexInitializedEvent implements HistoryEvent, VertexIDAware { private TezVertexID vertexID; private String vertexName; @@ -151,14 +153,14 @@ public void fromProto(RecoveryProtos.VertexInitializedProto proto) throws IOExce } @Override - public void toProtoStream(OutputStream outputStream) throws IOException { - toProto().writeDelimitedTo(outputStream); + public void toProtoStream(CodedOutputStream outputStream) throws IOException { + outputStream.writeMessageNoTag(toProto()); } @Override - public void fromProtoStream(InputStream inputStream) throws IOException { - RecoveryProtos.VertexInitializedProto proto = - RecoveryProtos.VertexInitializedProto.parseDelimitedFrom(inputStream); + public void fromProtoStream(CodedInputStream inputStream) throws IOException { + VertexInitializedProto proto = + inputStream.readMessage(VertexInitializedProto.PARSER, ExtensionRegistry.getEmptyRegistry()); if (proto == null) { throw new IOException("No data found in stream"); } @@ -181,8 +183,9 @@ public String toString() { + (servicePluginInfo != null ? servicePluginInfo : "null"); } + @Override public TezVertexID getVertexID() { - return this.vertexID; + return vertexID; } public long getInitRequestedTime() { diff --git a/tez-dag/src/main/java/org/apache/tez/dag/history/events/VertexStartedEvent.java b/tez-dag/src/main/java/org/apache/tez/dag/history/events/VertexStartedEvent.java index a8bd21eacf..53f00977a7 100644 --- a/tez-dag/src/main/java/org/apache/tez/dag/history/events/VertexStartedEvent.java +++ b/tez-dag/src/main/java/org/apache/tez/dag/history/events/VertexStartedEvent.java @@ -19,16 +19,18 @@ package org.apache.tez.dag.history.events; import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; +import com.google.protobuf.CodedInputStream; +import com.google.protobuf.CodedOutputStream; +import com.google.protobuf.ExtensionRegistry; import org.apache.tez.dag.app.dag.VertexState; import org.apache.tez.dag.history.HistoryEvent; import org.apache.tez.dag.history.HistoryEventType; import org.apache.tez.dag.records.TezVertexID; +import org.apache.tez.dag.records.VertexIDAware; import org.apache.tez.dag.recovery.records.RecoveryProtos.VertexStartedProto; -public class VertexStartedEvent implements HistoryEvent { +public class VertexStartedEvent implements HistoryEvent, VertexIDAware { private TezVertexID vertexID; private long startRequestedTime; @@ -74,13 +76,13 @@ public void fromProto(VertexStartedProto proto) { } @Override - public void toProtoStream(OutputStream outputStream) throws IOException { - toProto().writeDelimitedTo(outputStream); + public void toProtoStream(CodedOutputStream outputStream) throws IOException { + outputStream.writeMessageNoTag(toProto()); } @Override - public void fromProtoStream(InputStream inputStream) throws IOException { - VertexStartedProto proto = VertexStartedProto.parseDelimitedFrom(inputStream); + public void fromProtoStream(CodedInputStream inputStream) throws IOException { + VertexStartedProto proto = inputStream.readMessage(VertexStartedProto.PARSER, ExtensionRegistry.getEmptyRegistry()); if (proto == null) { throw new IOException("No data found in stream"); } @@ -94,8 +96,9 @@ public String toString() { + ", startedTime=" + startTime; } + @Override public TezVertexID getVertexID() { - return this.vertexID; + return vertexID; } public long getStartRequestedTime() { diff --git a/tez-dag/src/main/java/org/apache/tez/dag/history/logging/impl/HistoryEventJsonConversion.java b/tez-dag/src/main/java/org/apache/tez/dag/history/logging/impl/HistoryEventJsonConversion.java index e60575f38e..5cc940fc4e 100644 --- a/tez-dag/src/main/java/org/apache/tez/dag/history/logging/impl/HistoryEventJsonConversion.java +++ b/tez-dag/src/main/java/org/apache/tez/dag/history/logging/impl/HistoryEventJsonConversion.java @@ -51,14 +51,16 @@ import org.codehaus.jettison.json.JSONException; import org.codehaus.jettison.json.JSONObject; -public class HistoryEventJsonConversion { +public final class HistoryEventJsonConversion { + + private HistoryEventJsonConversion() {} public static JSONObject convertToJson(HistoryEvent historyEvent) throws JSONException { if (!historyEvent.isHistoryEvent()) { throw new UnsupportedOperationException("Invalid Event, does not support history" + ", eventType=" + historyEvent.getEventType()); } - JSONObject jsonObject = null; + JSONObject jsonObject; switch (historyEvent.getEventType()) { case APP_LAUNCHED: jsonObject = convertAppLaunchedEvent((AppLaunchedEvent) historyEvent); @@ -341,7 +343,7 @@ private static JSONObject convertContainerStoppedEvent(ContainerStoppedEvent eve private static JSONObject convertDAGFinishedEvent(DAGFinishedEvent event) throws JSONException { JSONObject jsonObject = new JSONObject(); jsonObject.put(ATSConstants.ENTITY, - event.getDagID().toString()); + event.getDAGID().toString()); jsonObject.put(ATSConstants.ENTITY_TYPE, EntityTypes.TEZ_DAG_ID.name()); @@ -384,7 +386,7 @@ private static JSONObject convertDAGFinishedEvent(DAGFinishedEvent event) throws private static JSONObject convertDAGInitializedEvent(DAGInitializedEvent event) throws JSONException { JSONObject jsonObject = new JSONObject(); jsonObject.put(ATSConstants.ENTITY, - event.getDagID().toString()); + event.getDAGID().toString()); jsonObject.put(ATSConstants.ENTITY_TYPE, EntityTypes.TEZ_DAG_ID.name()); @@ -402,7 +404,7 @@ private static JSONObject convertDAGInitializedEvent(DAGInitializedEvent event) JSONObject otherInfo = new JSONObject(); if (event.getVertexNameIDMap() != null) { - Map nameIdStrMap = new TreeMap(); + Map nameIdStrMap = new TreeMap<>(); for (Entry entry : event.getVertexNameIDMap().entrySet()) { nameIdStrMap.put(entry.getKey(), entry.getValue().toString()); } @@ -416,7 +418,7 @@ private static JSONObject convertDAGInitializedEvent(DAGInitializedEvent event) private static JSONObject convertDAGStartedEvent(DAGStartedEvent event) throws JSONException { JSONObject jsonObject = new JSONObject(); jsonObject.put(ATSConstants.ENTITY, - event.getDagID().toString()); + event.getDAGID().toString()); jsonObject.put(ATSConstants.ENTITY_TYPE, EntityTypes.TEZ_DAG_ID.name()); @@ -439,7 +441,7 @@ private static JSONObject convertDAGStartedEvent(DAGStartedEvent event) throws J private static JSONObject convertDAGSubmittedEvent(DAGSubmittedEvent event) throws JSONException { JSONObject jsonObject = new JSONObject(); jsonObject.put(ATSConstants.ENTITY, - event.getDagID().toString()); + event.getDAGID().toString()); jsonObject.put(ATSConstants.ENTITY_TYPE, EntityTypes.TEZ_DAG_ID.name()); @@ -602,7 +604,7 @@ private static JSONObject convertTaskAttemptStartedEvent(TaskAttemptStartedEvent containerEntity.put(ATSConstants.ENTITY_TYPE, ATSConstants.CONTAINER_ID); JSONObject taskEntity = new JSONObject(); - taskEntity.put(ATSConstants.ENTITY, event.getTaskAttemptID().getTaskID().toString()); + taskEntity.put(ATSConstants.ENTITY, event.getTaskID().toString()); taskEntity.put(ATSConstants.ENTITY_TYPE, EntityTypes.TEZ_TASK_ID.name()); relatedEntities.put(nodeEntity); @@ -667,7 +669,7 @@ private static JSONObject convertTaskStartedEvent(TaskStartedEvent event) throws // Related entities JSONArray relatedEntities = new JSONArray(); JSONObject vertexEntity = new JSONObject(); - vertexEntity.put(ATSConstants.ENTITY, event.getTaskID().getVertexID().toString()); + vertexEntity.put(ATSConstants.ENTITY, event.getVertexID().toString()); vertexEntity.put(ATSConstants.ENTITY_TYPE, EntityTypes.TEZ_VERTEX_ID.name()); relatedEntities.put(vertexEntity); jsonObject.put(ATSConstants.RELATED_ENTITIES, relatedEntities); @@ -775,7 +777,7 @@ private static JSONObject convertVertexInitializedEvent(VertexInitializedEvent e // Related entities JSONArray relatedEntities = new JSONArray(); JSONObject vertexEntity = new JSONObject(); - vertexEntity.put(ATSConstants.ENTITY, event.getVertexID().getDAGId().toString()); + vertexEntity.put(ATSConstants.ENTITY, event.getDAGID().toString()); vertexEntity.put(ATSConstants.ENTITY_TYPE, EntityTypes.TEZ_DAG_ID.name()); relatedEntities.put(vertexEntity); jsonObject.put(ATSConstants.RELATED_ENTITIES, relatedEntities); @@ -815,7 +817,7 @@ private static JSONObject convertVertexStartedEvent(VertexStartedEvent event) // Related entities JSONArray relatedEntities = new JSONArray(); JSONObject vertexEntity = new JSONObject(); - vertexEntity.put(ATSConstants.ENTITY, event.getVertexID().getDAGId().toString()); + vertexEntity.put(ATSConstants.ENTITY, event.getDAGID().toString()); vertexEntity.put(ATSConstants.ENTITY_TYPE, EntityTypes.TEZ_DAG_ID.name()); relatedEntities.put(vertexEntity); jsonObject.put(ATSConstants.RELATED_ENTITIES, relatedEntities); diff --git a/tez-dag/src/main/java/org/apache/tez/dag/history/logging/impl/SimpleHistoryLoggingService.java b/tez-dag/src/main/java/org/apache/tez/dag/history/logging/impl/SimpleHistoryLoggingService.java index 4372d8ec22..418bc3c5ea 100644 --- a/tez-dag/src/main/java/org/apache/tez/dag/history/logging/impl/SimpleHistoryLoggingService.java +++ b/tez-dag/src/main/java/org/apache/tez/dag/history/logging/impl/SimpleHistoryLoggingService.java @@ -23,6 +23,7 @@ import java.util.concurrent.LinkedBlockingQueue; import java.util.concurrent.atomic.AtomicBoolean; +import org.apache.tez.common.StreamHelper; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; @@ -130,7 +131,7 @@ protected void serviceStop() throws Exception { } try { if (outputStream != null) { - outputStream.hflush(); + StreamHelper.hflushIfSupported(outputStream); outputStream.close(); } } catch (IOException ioe) { diff --git a/tez-dag/src/main/java/org/apache/tez/dag/history/recovery/RecoveryService.java b/tez-dag/src/main/java/org/apache/tez/dag/history/recovery/RecoveryService.java index 8c291722e2..d51b79df76 100644 --- a/tez-dag/src/main/java/org/apache/tez/dag/history/recovery/RecoveryService.java +++ b/tez-dag/src/main/java/org/apache/tez/dag/history/recovery/RecoveryService.java @@ -28,6 +28,7 @@ import java.util.concurrent.LinkedBlockingQueue; import java.util.concurrent.atomic.AtomicBoolean; +import com.google.protobuf.CodedOutputStream; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; @@ -84,8 +85,7 @@ public class RecoveryService extends AbstractService { private FileSystem recoveryDirFS; // FS where staging dir exists Path recoveryPath; @VisibleForTesting - public Map outputStreamMap = new - HashMap(); + public Map outputStreamMap = new HashMap<>(); private int bufferSize; @VisibleForTesting public FSDataOutputStream summaryStream; @@ -101,6 +101,31 @@ public class RecoveryService extends AbstractService { private volatile boolean drained = true; private Object waitForDrained = new Object(); + @VisibleForTesting + public static class RecoveryStream { + private final FSDataOutputStream outputStream; + private final CodedOutputStream codedOutputStream; + + RecoveryStream(FSDataOutputStream outputStream) { + this.outputStream = outputStream; + this.codedOutputStream = CodedOutputStream.newInstance(outputStream); + } + + public void write(byte[] bytes) throws IOException { + codedOutputStream.writeRawBytes(bytes); + } + + public void flush() throws IOException { + codedOutputStream.flush(); + outputStream.hflush(); + } + + public void close() throws IOException { + flush(); + outputStream.close(); + } + } + public RecoveryService(AppContext appContext) { super(RecoveryService.class.getName()); this.appContext = appContext; @@ -231,10 +256,9 @@ public void serviceStop() throws Exception { } } } - for (Entry entry : outputStreamMap.entrySet()) { + for (Entry entry : outputStreamMap.entrySet()) { try { LOG.info("Closing Output Stream for DAG " + entry.getKey()); - entry.getValue().hflush(); entry.getValue().close(); } catch (IOException ioe) { if (!recoveryDirFS.exists(recoveryPath)) { @@ -259,7 +283,7 @@ private void addToEventQueue(DAGHistoryEvent event) { public void handle(DAGHistoryEvent event) throws IOException { if (stopped.get()) { - LOG.warn("Igoring event as service stopped, eventType" + LOG.warn("Ignoring event as service stopped, eventType" + event.getHistoryEvent().getEventType()); return; } @@ -276,7 +300,7 @@ public void handle(DAGHistoryEvent event) throws IOException { return; } - TezDAGID dagId = event.getDagID(); + TezDAGID dagId = event.getDAGID(); if (eventType.equals(HistoryEventType.DAG_SUBMITTED)) { DAGSubmittedEvent dagSubmittedEvent = (DAGSubmittedEvent) event.getHistoryEvent(); @@ -303,7 +327,7 @@ public void handle(DAGHistoryEvent event) throws IOException { if (event.getHistoryEvent() instanceof SummaryEvent) { synchronized (lock) { if (stopped.get()) { - LOG.warn("Igoring event as service stopped, eventType" + LOG.warn("Ignoring event as service stopped, eventType" + event.getHistoryEvent().getEventType()); return; } @@ -313,8 +337,8 @@ public void handle(DAGHistoryEvent event) throws IOException { if (summaryEvent.writeToRecoveryImmediately()) { handleRecoveryEvent(event); // outputStream may already be closed and removed - if (outputStreamMap.containsKey(event.getDagID())) { - doFlush(outputStreamMap.get(event.getDagID()), + if (outputStreamMap.containsKey(event.getDAGID())) { + doFlush(outputStreamMap.get(event.getDAGID()), appContext.getClock().getTime()); } } else { @@ -326,7 +350,7 @@ public void handle(DAGHistoryEvent event) throws IOException { } if (eventType.equals(HistoryEventType.DAG_FINISHED)) { LOG.info("DAG completed" - + ", dagId=" + event.getDagID() + + ", dagId=" + event.getDAGID() + ", queueSize=" + eventQueue.size()); completedDAGs.add(dagId); if (outputStreamMap.containsKey(dagId)) { @@ -335,7 +359,7 @@ public void handle(DAGHistoryEvent event) throws IOException { outputStreamMap.remove(dagId); } catch (IOException ioe) { LOG.warn("Error when trying to flush/close recovery file for" - + " dag, dagId=" + event.getDagID()); + + " dag, dagId=" + event.getDAGID()); } } } @@ -381,20 +405,17 @@ private void createFatalErrorFlagDir() throws IOException { protected void handleSummaryEvent(TezDAGID dagID, HistoryEventType eventType, SummaryEvent summaryEvent) throws IOException { - if (LOG.isDebugEnabled()) { - LOG.debug("Handling summary event" - + ", dagID=" + dagID - + ", eventType=" + eventType); - } + LOG.debug("Handling summary event, dagID={}, eventType={}", dagID, eventType); + if (summaryStream == null) { Path summaryPath = TezCommonUtils.getSummaryRecoveryPath(recoveryPath); if (LOG.isDebugEnabled()) { LOG.debug("AppId :" + appContext.getApplicationID() + " summaryPath " + summaryPath); } - if (!recoveryDirFS.exists(summaryPath)) { - summaryStream = recoveryDirFS.create(summaryPath, false, - bufferSize); - } else { + try { + summaryStream = recoveryDirFS.create(summaryPath, false, bufferSize); + } catch (IOException e) { + LOG.error("Error handling summary event, eventType=" + eventType, e); createFatalErrorFlagDir(); return; } @@ -415,7 +436,7 @@ protected void handleRecoveryEvent(DAGHistoryEvent event) throws IOException { LOG.debug("Handling recovery event of type " + event.getHistoryEvent().getEventType()); } - TezDAGID dagID = event.getDagID(); + TezDAGID dagID = event.getDAGID(); if (completedDAGs.contains(dagID)) { // no need to recover completed DAGs @@ -429,48 +450,42 @@ protected void handleRecoveryEvent(DAGHistoryEvent event) throws IOException { return; } - if (!outputStreamMap.containsKey(dagID)) { + RecoveryStream recoveryStream = outputStreamMap.get(dagID); + if (recoveryStream == null) { Path dagFilePath = TezCommonUtils.getDAGRecoveryPath(recoveryPath, dagID.toString()); - FSDataOutputStream outputStream; - if (recoveryDirFS.exists(dagFilePath)) { + + try { + FSDataOutputStream outputStream = recoveryDirFS.create(dagFilePath, false, bufferSize); + LOG.debug("Opened DAG recovery file in create mode, filePath={}", dagFilePath); + recoveryStream = new RecoveryStream(outputStream); + } catch (IOException ioe) { + LOG.error("Error handling history event, eventType=" + eventType, ioe); createFatalErrorFlagDir(); return; - } else { - if (LOG.isDebugEnabled()) { - LOG.debug("Opening DAG recovery file in create mode" - + ", filePath=" + dagFilePath); - } - outputStream = recoveryDirFS.create(dagFilePath, false, bufferSize); } - outputStreamMap.put(dagID, outputStream); + outputStreamMap.put(dagID, recoveryStream); } - FSDataOutputStream outputStream = outputStreamMap.get(dagID); - if (LOG.isDebugEnabled()) { - LOG.debug("Writing recovery event to output stream" - + ", dagId=" + dagID - + ", eventType=" + eventType); - } + LOG.debug("Writing recovery event to output stream, dagId={}, eventType={}", + dagID, eventType); ++unflushedEventsCount; - outputStream.writeInt(event.getHistoryEvent().getEventType().ordinal()); - event.getHistoryEvent().toProtoStream(outputStream); + recoveryStream.codedOutputStream.writeFixed32NoTag(event.getHistoryEvent().getEventType().ordinal()); + event.getHistoryEvent().toProtoStream(recoveryStream.codedOutputStream); if (!EnumSet.of(HistoryEventType.DAG_SUBMITTED, HistoryEventType.DAG_FINISHED).contains(eventType)) { - maybeFlush(outputStream); + maybeFlush(recoveryStream); } } - private void maybeFlush(FSDataOutputStream outputStream) throws IOException { + private void maybeFlush(RecoveryStream recoveryStream) throws IOException { long currentTime = appContext.getClock().getTime(); boolean doFlush = false; if (maxUnflushedEvents >=0 && unflushedEventsCount >= maxUnflushedEvents) { - if (LOG.isDebugEnabled()) { - LOG.debug("Max unflushed events count reached. Flushing recovery data" - + ", unflushedEventsCount=" + unflushedEventsCount - + ", maxUnflushedEvents=" + maxUnflushedEvents); - } + LOG.debug("Max unflushed events count reached. Flushing recovery data, " + + "unflushedEventsCount={}, maxUnflushedEvents={}", unflushedEventsCount, + maxUnflushedEvents); doFlush = true; } else if (flushInterval >= 0 && ((currentTime - lastFlushTime) >= (flushInterval*1000))) { @@ -482,12 +497,12 @@ private void maybeFlush(FSDataOutputStream outputStream) throws IOException { if (!doFlush) { return; } - doFlush(outputStream, currentTime); + doFlush(recoveryStream, currentTime); } - private void doFlush(FSDataOutputStream outputStream, + private void doFlush(RecoveryStream recoveryStream, long currentTime) throws IOException { - outputStream.hflush(); + recoveryStream.flush(); if (LOG.isDebugEnabled()) { LOG.debug("Flushing output stream" diff --git a/tez-dag/src/main/java/org/apache/tez/dag/history/utils/DAGUtils.java b/tez-dag/src/main/java/org/apache/tez/dag/history/utils/DAGUtils.java index dce9e521cc..4c45e74101 100644 --- a/tez-dag/src/main/java/org/apache/tez/dag/history/utils/DAGUtils.java +++ b/tez-dag/src/main/java/org/apache/tez/dag/history/utils/DAGUtils.java @@ -34,6 +34,8 @@ import org.apache.tez.common.ATSConstants; import org.apache.tez.common.TezCommonUtils; import org.apache.tez.common.VersionInfo; +import org.apache.tez.common.counters.AggregateTezCounter; +import org.apache.tez.common.counters.AggregateTezCounterDelegate; import org.apache.tez.common.counters.CounterGroup; import org.apache.tez.common.counters.TezCounter; import org.apache.tez.common.counters.TezCounters; @@ -52,9 +54,9 @@ import org.codehaus.jettison.json.JSONException; import org.codehaus.jettison.json.JSONObject; -import com.google.common.base.Preconditions; +import org.apache.tez.common.Preconditions; -public class DAGUtils { +public final class DAGUtils { public static final String DAG_NAME_KEY = "dagName"; public static final String DAG_INFO_KEY = "dagInfo"; @@ -97,7 +99,7 @@ public class DAGUtils { public static final String VERTEX_GROUP_EDGE_MERGED_INPUTS_KEY = "edgeMergedInputs"; public static final String VERTEX_GROUP_DESTINATION_VERTEX_NAME_KEY = "destinationVertexName"; - + private DAGUtils() {} public static JSONObject generateSimpleJSONPlan(DAGPlan dagPlan) throws JSONException { JSONObject dagJson; @@ -109,7 +111,8 @@ public static JSONObject generateSimpleJSONPlan(DAGPlan dagPlan) throws JSONExce return dagJson; } - public static JSONObject convertDataEventDependencyInfoToJSON(List info) { + public static JSONObject convertDataEventDependencyInfoToJSON(List info) + throws JSONException { return new JSONObject(convertDataEventDependecyInfoToATS(info)); } @@ -137,7 +140,7 @@ public static JSONObject convertCountersToJSON(TezCounters counters) } public static Map convertCountersToATSMap(TezCounters counters) { - Map object = new LinkedHashMap(); + Map object = new LinkedHashMap(); if (counters == null) { return object; } @@ -153,6 +156,15 @@ public static Map convertCountersToATSMap(TezCounters counters) { counter.getDisplayName()); } counterMap.put(ATSConstants.COUNTER_VALUE, counter.getValue()); + if (counter instanceof AggregateTezCounter) { + counterMap.put(ATSConstants.COUNTER_INSTANCE_COUNT, + ((AggregateTezCounter)counter).getCount()); + counterMap.put(ATSConstants.COUNTER_MAX_VALUE, + ((AggregateTezCounter)counter).getMax()); + counterMap.put(ATSConstants.COUNTER_MIN_VALUE, + ((AggregateTezCounter)counter).getMin()); + + } counterList.add(counterMap); } } @@ -187,8 +199,24 @@ static Map createDagInfoMap(DAGPlan dagPlan) { return dagInfo; } - public static Map convertDAGPlanToATSMap(DAGPlan dagPlan) throws IOException { + public static Map convertDAGPlanToATSMap(final DAGPlan + dagPlan) throws IOException { final Inflater inflater = TezCommonUtils.newInflater(); + try { + return convertDAGPlanToATSMap(dagPlan, inflater); + } finally { + inflater.end(); + } + } + + /** + * Auxiliary method to convert dagPlan to ATS Map. + * @param dagPlan dag plan. + * @param inflater inflater. This method shouldn't end it. + * @return ATS MAP + */ + private static Map convertDAGPlanToATSMap(DAGPlan dagPlan, + final Inflater inflater) { final String VERSION_KEY = "version"; final int version = 2; Map dagMap = new LinkedHashMap(); @@ -409,7 +437,7 @@ public static Map convertVertexStatsToATSMap( } public static JSONObject convertServicePluginToJSON( - ServicePluginInfo servicePluginInfo) { + ServicePluginInfo servicePluginInfo) throws JSONException { JSONObject jsonObject = new JSONObject(convertServicePluginToATSMap(servicePluginInfo)); return jsonObject; } diff --git a/tez-dag/src/main/java/org/apache/tez/dag/history/utils/TezEventUtils.java b/tez-dag/src/main/java/org/apache/tez/dag/history/utils/TezEventUtils.java index cc89b9fb4d..16d59c42f5 100644 --- a/tez-dag/src/main/java/org/apache/tez/dag/history/utils/TezEventUtils.java +++ b/tez-dag/src/main/java/org/apache/tez/dag/history/utils/TezEventUtils.java @@ -32,7 +32,9 @@ import org.apache.tez.runtime.api.impl.EventType; import org.apache.tez.runtime.api.impl.TezEvent; -public class TezEventUtils { +public final class TezEventUtils { + + private TezEventUtils() {} public static TezEventProto toProto(TezEvent event) throws IOException { TezEventProto.Builder evtBuilder = diff --git a/tez-dag/src/main/java/org/apache/tez/dag/utils/ProtoUtils.java b/tez-dag/src/main/java/org/apache/tez/dag/utils/ProtoUtils.java index 56e46a0556..e179b4fd9b 100644 --- a/tez-dag/src/main/java/org/apache/tez/dag/utils/ProtoUtils.java +++ b/tez-dag/src/main/java/org/apache/tez/dag/utils/ProtoUtils.java @@ -24,7 +24,9 @@ import com.google.protobuf.ByteString; -public class ProtoUtils { +public final class ProtoUtils { + + private ProtoUtils() {} public static RecoveryProtos.SummaryEventProto toSummaryEventProto( TezDAGID dagID, long timestamp, HistoryEventType historyEventType, byte[] payload) { diff --git a/tez-dag/src/main/java/org/apache/tez/dag/utils/TaskSpecificLaunchCmdOption.java b/tez-dag/src/main/java/org/apache/tez/dag/utils/TaskSpecificLaunchCmdOption.java index 7d93481581..97dcda5a14 100644 --- a/tez-dag/src/main/java/org/apache/tez/dag/utils/TaskSpecificLaunchCmdOption.java +++ b/tez-dag/src/main/java/org/apache/tez/dag/utils/TaskSpecificLaunchCmdOption.java @@ -152,7 +152,7 @@ private boolean shouldParseSpecificTaskList() { * * * @param conf - * @return a map from the vertex name to a BitSet representing tasks to be instruemented. null if + * @return a map from the vertex name to a BitSet representing tasks to be instrumented. null if * the provided configuration is empty or invalid */ private Map getSpecificTasks(Configuration conf) { diff --git a/tez-dag/src/main/java/org/apache/tez/dag/utils/TezBuilderUtils.java b/tez-dag/src/main/java/org/apache/tez/dag/utils/TezBuilderUtils.java index b7e6f724f7..25551a1ccb 100644 --- a/tez-dag/src/main/java/org/apache/tez/dag/utils/TezBuilderUtils.java +++ b/tez-dag/src/main/java/org/apache/tez/dag/utils/TezBuilderUtils.java @@ -27,7 +27,9 @@ import org.apache.tez.dag.records.TezTaskID; import org.apache.tez.dag.records.TezVertexID; -public class TezBuilderUtils { +public final class TezBuilderUtils { + + private TezBuilderUtils() {} public static TezVertexID newVertexID(TezDAGID dagId, int vertexId) { return TezVertexID.getInstance(dagId, vertexId); diff --git a/tez-dag/src/main/java/org/apache/tez/dag/utils/TezRuntimeChildJVM.java b/tez-dag/src/main/java/org/apache/tez/dag/utils/TezRuntimeChildJVM.java index 9458193dc1..5d107ed737 100644 --- a/tez-dag/src/main/java/org/apache/tez/dag/utils/TezRuntimeChildJVM.java +++ b/tez-dag/src/main/java/org/apache/tez/dag/utils/TezRuntimeChildJVM.java @@ -28,10 +28,12 @@ import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.tez.runtime.task.TezChild; -public class TezRuntimeChildJVM { +public final class TezRuntimeChildJVM { + + private TezRuntimeChildJVM() {} // FIXME - public static enum LogName { + public enum LogName { /** Log on the stdout of the task. */ STDOUT ("stdout"), @@ -47,9 +49,9 @@ public static enum LogName { /** Log the debug script's stdout */ DEBUGOUT ("debugout"); - private String prefix; + private final String prefix; - private LogName(String prefix) { + LogName(String prefix) { this.prefix = prefix; } @@ -71,7 +73,7 @@ public static List getVMCommand( int applicationAttemptNumber, String javaOpts) { - Vector vargs = new Vector(9); + Vector vargs = new Vector<>(9); vargs.add(Environment.JAVA_HOME.$() + "/bin/java"); @@ -96,12 +98,12 @@ public static List getVMCommand( vargs.add("2>" + getTaskLogFile(LogName.STDERR)); // TODO Is this StringBuilder really required ? YARN already accepts a list of commands. - // Final commmand + // Final command StringBuilder mergedCommand = new StringBuilder(); for (CharSequence str : vargs) { mergedCommand.append(str).append(" "); } - Vector vargsFinal = new Vector(1); + Vector vargsFinal = new Vector<>(1); vargsFinal.add(mergedCommand.toString()); return vargsFinal; } diff --git a/tez-dag/src/main/java/org/apache/tez/serviceplugins/api/TaskCommunicator.java b/tez-dag/src/main/java/org/apache/tez/serviceplugins/api/TaskCommunicator.java index fceddf2522..be6ad68d55 100644 --- a/tez-dag/src/main/java/org/apache/tez/serviceplugins/api/TaskCommunicator.java +++ b/tez-dag/src/main/java/org/apache/tez/serviceplugins/api/TaskCommunicator.java @@ -237,4 +237,13 @@ public String getCompletedLogsUrl(TezTaskAttemptID attemptID, NodeId containerNo return null; } + /** + * Return the amount of memory used by the containers. Each container is supposed to refresh + * its current state via heartbeat requests, and the TaskCommunicator implementation is supposed + * to aggregate this properly. + * @return memory in MB + */ + public long getTotalUsedMemory() { + return 0; + } } diff --git a/tez-dag/src/main/java/org/apache/tez/serviceplugins/api/TaskCommunicatorContext.java b/tez-dag/src/main/java/org/apache/tez/serviceplugins/api/TaskCommunicatorContext.java index b6f3a549b6..69cb4acf2e 100644 --- a/tez-dag/src/main/java/org/apache/tez/serviceplugins/api/TaskCommunicatorContext.java +++ b/tez-dag/src/main/java/org/apache/tez/serviceplugins/api/TaskCommunicatorContext.java @@ -41,7 +41,7 @@ public interface TaskCommunicatorContext extends ServicePluginContextBase { // - Report taskSuccess via a method instead of the heartbeat // - Add methods to signal container / task state changes // - Maybe add book-keeping as a helper library, instead of each impl tracking container to task etc. - // - Handling of containres / tasks which no longer exist in the system (formalized interface instead of a shouldDie notification) + // - Handling of containers / tasks which no longer exist in the system (formalized interface instead of a shouldDie notification) /** @@ -75,7 +75,7 @@ public interface TaskCommunicatorContext extends ServicePluginContextBase { * This method must be invoked periodically to receive updates for a running task * * @param request the update from the running task. - * @return the response that is requried by the task. + * @return the response that is required by the task. * @throws IOException * @throws TezException */ @@ -176,7 +176,7 @@ void taskFailed(TezTaskAttemptID taskAttemptId, TaskFailureType taskFailureType, /** * Get an identifier for the executing context of the DAG. - * @return a String identifier for the exeucting context. + * @return a String identifier for the executing context. */ String getCurrentAppIdentifier(); @@ -228,5 +228,4 @@ void taskFailed(TezTaskAttemptID taskAttemptId, TaskFailureType taskFailureType, * @return time when the current dag started executing */ long getDagStartTime(); - } diff --git a/tez-dag/src/main/java/org/apache/tez/state/StateMachineTez.java b/tez-dag/src/main/java/org/apache/tez/state/StateMachineTez.java index 3be7718926..cbb838d7f0 100644 --- a/tez-dag/src/main/java/org/apache/tez/state/StateMachineTez.java +++ b/tez-dag/src/main/java/org/apache/tez/state/StateMachineTez.java @@ -51,6 +51,11 @@ public STATE getCurrentState() { return realStatemachine.getCurrentState(); } + @Override + public STATE getPreviousState() { + return realStatemachine.getPreviousState(); + } + @SuppressWarnings("unchecked") @Override public STATE doTransition(EVENTTYPE eventType, EVENT event) throws diff --git a/tez-dag/src/main/javadoc/resources/META-INF/LICENSE.txt b/tez-dag/src/main/javadoc/resources/META-INF/LICENSE similarity index 100% rename from tez-dag/src/main/javadoc/resources/META-INF/LICENSE.txt rename to tez-dag/src/main/javadoc/resources/META-INF/LICENSE diff --git a/tez-dag/src/main/javadoc/resources/META-INF/NOTICE b/tez-dag/src/main/javadoc/resources/META-INF/NOTICE new file mode 100644 index 0000000000..2595905699 --- /dev/null +++ b/tez-dag/src/main/javadoc/resources/META-INF/NOTICE @@ -0,0 +1,6 @@ +Apache Tez +Copyright 2014-2024 The Apache Software Foundation + +This product includes software developed at +The Apache Software Foundation (http://www.apache.org/). + diff --git a/tez-dag/src/main/javadoc/resources/META-INF/NOTICE.txt b/tez-dag/src/main/javadoc/resources/META-INF/NOTICE.txt deleted file mode 100644 index 3f36fcc6ba..0000000000 --- a/tez-dag/src/main/javadoc/resources/META-INF/NOTICE.txt +++ /dev/null @@ -1,6 +0,0 @@ -Apache Tez -Copyright (c) 2016 The Apache Software Foundation - -This product includes software developed at -The Apache Software Foundation (http://www.apache.org/). - diff --git a/tez-dag/src/main/resources/META-INF/LICENSE.txt b/tez-dag/src/main/resources/META-INF/LICENSE similarity index 100% rename from tez-dag/src/main/resources/META-INF/LICENSE.txt rename to tez-dag/src/main/resources/META-INF/LICENSE diff --git a/tez-dag/src/main/resources/META-INF/NOTICE b/tez-dag/src/main/resources/META-INF/NOTICE new file mode 100644 index 0000000000..2595905699 --- /dev/null +++ b/tez-dag/src/main/resources/META-INF/NOTICE @@ -0,0 +1,6 @@ +Apache Tez +Copyright 2014-2024 The Apache Software Foundation + +This product includes software developed at +The Apache Software Foundation (http://www.apache.org/). + diff --git a/tez-dag/src/main/resources/META-INF/NOTICE.txt b/tez-dag/src/main/resources/META-INF/NOTICE.txt deleted file mode 100644 index 3f36fcc6ba..0000000000 --- a/tez-dag/src/main/resources/META-INF/NOTICE.txt +++ /dev/null @@ -1,6 +0,0 @@ -Apache Tez -Copyright (c) 2016 The Apache Software Foundation - -This product includes software developed at -The Apache Software Foundation (http://www.apache.org/). - diff --git a/tez-dag/src/main/resources/tez-dag-version-info.properties b/tez-dag/src/main/resources/tez-dag-version-info.properties index 4bb7d40b8b..0bc30c4bb0 100644 --- a/tez-dag/src/main/resources/tez-dag-version-info.properties +++ b/tez-dag/src/main/resources/tez-dag-version-info.properties @@ -19,4 +19,6 @@ version=${pom.version} revision=${buildNumber} buildtime=${build.time} +builduser=${user.name} +buildjavaversion=${java.version} scmurl=${scm.url} diff --git a/tez-dag/src/test/java/org/apache/tez/dag/api/client/TestDAGClientHandler.java b/tez-dag/src/test/java/org/apache/tez/dag/api/client/TestDAGClientHandler.java index bf07838a45..f594df2d9c 100644 --- a/tez-dag/src/test/java/org/apache/tez/dag/api/client/TestDAGClientHandler.java +++ b/tez-dag/src/test/java/org/apache/tez/dag/api/client/TestDAGClientHandler.java @@ -27,6 +27,7 @@ import org.apache.hadoop.yarn.api.records.LocalResource; import org.apache.hadoop.yarn.util.SystemClock; import org.apache.tez.client.TezAppMasterStatus; +import org.apache.tez.dag.api.NoCurrentDAGException; import org.apache.tez.dag.api.TezException; import org.apache.tez.dag.api.records.DAGProtos.DAGPlan; import org.apache.tez.dag.app.AppContext; @@ -51,19 +52,16 @@ public void testDAGClientHandler() throws TezException { DAG mockDAG = mock(DAG.class); when(mockDAG.getID()).thenReturn(mockTezDAGId); DAGStatusBuilder mockDagStatusBuilder = mock(DAGStatusBuilder.class); - when(mockDAG.getDAGStatus(anySetOf(StatusGetOpts.class))).thenReturn( + when(mockDAG.getDAGStatus(anySet())).thenReturn( mockDagStatusBuilder); VertexStatusBuilder mockVertexStatusBuilder = mock(VertexStatusBuilder.class); - when(mockDAG.getVertexStatus(anyString(), anySetOf(StatusGetOpts.class))) + when(mockDAG.getVertexStatus(anyString(), anySet())) .thenReturn(mockVertexStatusBuilder); - DAGAppMaster mockDagAM = mock(DAGAppMaster.class); - when(mockDagAM.getState()).thenReturn(DAGAppMasterState.RUNNING); - AppContext mockAppContext = mock(AppContext.class); - when(mockDagAM.getContext()).thenReturn(mockAppContext); + DAGAppMaster mockDagAM = getMockAm(); + when(mockDagAM.getContext().getCurrentDAG()).thenReturn(mockDAG); - when(mockAppContext.getClock()).thenReturn(new SystemClock()); DAGClientHandler dagClientHandler = new DAGClientHandler(mockDagAM); @@ -130,5 +128,44 @@ public void testDAGClientHandler() throws TezException { dagClientHandler.shutdownAM(); verify(mockDagAM).shutdownTezAM(contains("Received message to shutdown AM from")); } - + + @Test + public void testCurrentDAGFound() throws TezException { + TezDAGID mockTezDAGId = mock(TezDAGID.class); + when(mockTezDAGId.getId()).thenReturn(1); + when(mockTezDAGId.toString()).thenReturn("dag_9999_0001_1"); + + DAG mockDAG = mock(DAG.class); + when(mockDAG.getID()).thenReturn(mockTezDAGId); + + DAGAppMaster mockDagAM = getMockAm(); + + // make the DAGAppMaster return the mockDAG as current DAG + when(mockDagAM.getContext().getCurrentDAG()).thenReturn(mockDAG); + + DAGClientHandler dagClientHandler = new DAGClientHandler(mockDagAM); + assertEquals("dag_9999_0001_1", dagClientHandler.getDAG("dag_9999_0001_1").getID().toString()); + } + + @Test(expected = NoCurrentDAGException.class) + public void testNoCurrentDAGException() throws TezException { + DAGAppMaster mockDagAM = getMockAm(); + + // make the DAGAppMaster return null as current DAG + when(mockDagAM.getContext().getCurrentDAG()).thenReturn(null); + + // so this should throw NoCurrentDAGException + new DAGClientHandler(mockDagAM).getDAG("dag_0000_0000_0"); + } + + private DAGAppMaster getMockAm() { + DAGAppMaster mockDagAM = mock(DAGAppMaster.class); + when(mockDagAM.getState()).thenReturn(DAGAppMasterState.RUNNING); + + AppContext mockAppContext = mock(AppContext.class); + when(mockDagAM.getContext()).thenReturn(mockAppContext); + when(mockAppContext.getClock()).thenReturn(new SystemClock()); + + return mockDagAM; + } } diff --git a/tez-dag/src/test/java/org/apache/tez/dag/api/client/rpc/TestDAGClientAMProtocolBlockingPBServerImpl.java b/tez-dag/src/test/java/org/apache/tez/dag/api/client/rpc/TestDAGClientAMProtocolBlockingPBServerImpl.java index 040ca2fb7b..5f6552ee81 100644 --- a/tez-dag/src/test/java/org/apache/tez/dag/api/client/rpc/TestDAGClientAMProtocolBlockingPBServerImpl.java +++ b/tez-dag/src/test/java/org/apache/tez/dag/api/client/rpc/TestDAGClientAMProtocolBlockingPBServerImpl.java @@ -28,8 +28,11 @@ import static junit.framework.TestCase.assertEquals; import static org.junit.Assert.assertArrayEquals; import static org.junit.Assert.assertTrue; -import static org.mockito.Matchers.any; -import static org.mockito.Mockito.*; +import static org.mockito.Mockito.any; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.spy; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; import org.apache.hadoop.fs.CommonConfigurationKeys; import org.apache.hadoop.fs.FileSystem; @@ -72,7 +75,7 @@ public void init() { @SuppressWarnings("unchecked") public void testSubmitDagInSessionWithLargeDagPlan() throws Exception { int maxIPCMsgSize = 1024; - String dagPlanName = "dagplan-name"; + String dagPlanName = "DAG-testSubmitDagInSessionWithLargeDagPlan"; File requestFile = tmpFolder.newFile("request-file"); TezConfiguration conf = new TezConfiguration(); conf.setInt(CommonConfigurationKeys.IPC_MAXIMUM_DATA_LENGTH, maxIPCMsgSize); diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/MockClock.java b/tez-dag/src/test/java/org/apache/tez/dag/app/MockClock.java index d01571446d..c7cf74eb84 100644 --- a/tez-dag/src/test/java/org/apache/tez/dag/app/MockClock.java +++ b/tez-dag/src/test/java/org/apache/tez/dag/app/MockClock.java @@ -20,9 +20,21 @@ import org.apache.hadoop.yarn.util.Clock; +import java.util.Collection; +import java.util.LinkedList; + public class MockClock implements Clock { - long time = 1000; + long time; + Collection listeners = new LinkedList<>(); + + public MockClock() { + this(1000); + } + + public MockClock(long initTime) { + time = initTime; + } @Override public long getTime() { @@ -31,6 +43,16 @@ public long getTime() { public void incrementTime(long inc) { time += inc; + for (MockClockListener listener : listeners) { + listener.onTimeUpdated(time); + } + } + + public void register(MockClockListener listener) { + listeners.add(listener); } + public interface MockClockListener { + void onTimeUpdated(long newTime); + } } diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/MockDAGAppMaster.java b/tez-dag/src/test/java/org/apache/tez/dag/app/MockDAGAppMaster.java index 893e03d3a1..b3ddaa0f86 100644 --- a/tez-dag/src/test/java/org/apache/tez/dag/app/MockDAGAppMaster.java +++ b/tez-dag/src/test/java/org/apache/tez/dag/app/MockDAGAppMaster.java @@ -57,6 +57,8 @@ import org.apache.tez.client.TezApiVersionInfo; import org.apache.tez.common.ContainerContext; import org.apache.tez.common.ContainerTask; +import org.apache.tez.common.GuavaShim; +import org.apache.tez.common.Preconditions; import org.apache.tez.common.counters.TezCounters; import org.apache.tez.dag.api.TezConfiguration; import org.apache.tez.dag.api.TezUncheckedException; @@ -73,7 +75,6 @@ import org.apache.tez.runtime.api.impl.TezEvent; import org.apache.tez.runtime.api.impl.EventMetaData.EventProducerConsumerType; -import com.google.common.base.Preconditions; import com.google.common.collect.Lists; import com.google.common.collect.Maps; import com.google.common.util.concurrent.FutureCallback; @@ -323,7 +324,7 @@ public void run() { Worker worker = workers.remove(); worker.setContainerData(cData); ListenableFuture future = executorService.submit(worker); - Futures.addCallback(future, worker.getCallback()); + Futures.addCallback(future, worker.getCallback(), GuavaShim.directExecutor()); } else { containers.remove(cData.cId); } @@ -430,7 +431,7 @@ public Void call() throws Exception { EventProducerConsumerType.SYSTEM, cData.vName, "", cData.taId), MockDAGAppMaster.this.getContext().getClock().getTime())); TezHeartbeatRequest request = new TezHeartbeatRequest(cData.numUpdates, events, - cData.nextPreRoutedFromEventId, cData.cIdStr, cData.taId, cData.nextFromEventId, 50000); + cData.nextPreRoutedFromEventId, cData.cIdStr, cData.taId, cData.nextFromEventId, 50000, 0); doHeartbeat(request, cData); } else if (version != null && cData.taId.getId() <= version.intValue()) { preemptContainer(cData); @@ -442,7 +443,7 @@ public Void call() throws Exception { EventProducerConsumerType.SYSTEM, cData.vName, "", cData.taId), MockDAGAppMaster.this.getContext().getClock().getTime())); TezHeartbeatRequest request = new TezHeartbeatRequest(++cData.numUpdates, events, - cData.nextPreRoutedFromEventId, cData.cIdStr, cData.taId, cData.nextFromEventId, 10000); + cData.nextPreRoutedFromEventId, cData.cIdStr, cData.taId, cData.nextFromEventId, 10000, 0); doHeartbeat(request, cData); cData.clear(); } diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/MockLocalClient.java b/tez-dag/src/test/java/org/apache/tez/dag/app/MockLocalClient.java index 552651616e..c3355473d1 100644 --- a/tez-dag/src/test/java/org/apache/tez/dag/app/MockLocalClient.java +++ b/tez-dag/src/test/java/org/apache/tez/dag/app/MockLocalClient.java @@ -38,7 +38,7 @@ public class MockLocalClient extends LocalClient { public MockLocalClient(AtomicBoolean mockAppLauncherGoFlag, Clock clock) { this(mockAppLauncherGoFlag, clock, false, false, 1, 1); } - + public MockLocalClient(AtomicBoolean mockAppLauncherGoFlag, Clock clock, boolean initFailFlag, boolean startFailFlag, int concurrency, int containers) { this.mockAppLauncherGoFlag = mockAppLauncherGoFlag; @@ -60,7 +60,7 @@ protected DAGAppMaster createDAGAppMaster(ApplicationAttemptId applicationAttemp concurrency, containers); return mockApp; } - + public MockDAGAppMaster getMockApp() { return mockApp; } diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/PluginWrapperTestHelpers.java b/tez-dag/src/test/java/org/apache/tez/dag/app/PluginWrapperTestHelpers.java index fb6faa1bb6..5009cefffd 100644 --- a/tez-dag/src/test/java/org/apache/tez/dag/app/PluginWrapperTestHelpers.java +++ b/tez-dag/src/test/java/org/apache/tez/dag/app/PluginWrapperTestHelpers.java @@ -14,9 +14,7 @@ package org.apache.tez.dag.app; -import static org.junit.Assert.assertArrayEquals; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; +import static org.junit.Assert.*; import static org.mockito.Mockito.mock; import java.lang.reflect.Constructor; @@ -30,15 +28,17 @@ import org.slf4j.LoggerFactory; -public class PluginWrapperTestHelpers { +public final class PluginWrapperTestHelpers { private static final Logger LOG = LoggerFactory.getLogger(PluginWrapperTestHelpers.class); + private PluginWrapperTestHelpers() {} + public static void testDelegation(Class delegateClass, Class rawClass, Set skipMethods) throws Exception { TrackingAnswer answer = new TrackingAnswer(); Object mock = mock(rawClass, answer); - Constructor ctor = delegateClass.getConstructor(rawClass); + Constructor ctor = delegateClass.getConstructor(rawClass); Object wrapper = ctor.newInstance(mock); // Run through all the methods on the wrapper, and invoke the methods. Constructs @@ -48,7 +48,7 @@ public static void testDelegation(Class delegateClass, Class rawClass, if (method.getDeclaringClass().equals(delegateClass) && !skipMethods.contains(method.getName())) { - assertTrue(method.getExceptionTypes().length == 1); + assertEquals(1, method.getExceptionTypes().length); assertEquals(Exception.class, method.getExceptionTypes()[0]); LOG.info("Checking method [{}] with parameterTypes [{}]", method.getName(), Arrays.toString(method.getParameterTypes())); @@ -65,8 +65,8 @@ public static void testDelegation(Class delegateClass, Class rawClass, if (answer.compareAsPrimitive) { assertEquals(answer.lastRetValue, result); } else { - assertTrue("Expected: " + System.identityHashCode(answer.lastRetValue) + ", actual=" + - System.identityHashCode(result), answer.lastRetValue == result); + assertSame("Expected: " + System.identityHashCode(answer.lastRetValue) + ", actual=" + + System.identityHashCode(result), answer.lastRetValue, result); } } } @@ -74,8 +74,7 @@ public static void testDelegation(Class delegateClass, Class rawClass, } - public static Object[] constructMethodArgs(Method method) throws IllegalAccessException, - InstantiationException { + public static Object[] constructMethodArgs(Method method) { Class[] paramTypes = method.getParameterTypes(); Object[] params = new Object[paramTypes.length]; for (int i = 0; i < paramTypes.length; i++) { @@ -112,7 +111,7 @@ private static Object getValueForPrimitiveOrString(Class clazz) { } else if (clazz.equals(int.class)) { return 224; } else if (clazz.equals(long.class)) { - return 445l; + return 445L; } else if (clazz.equals(float.class)) { return 2.24f; } else if (clazz.equals(double.class)) { diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/TestDAGAppMaster.java b/tez-dag/src/test/java/org/apache/tez/dag/app/TestDAGAppMaster.java index 570c6dcd95..46e8c98510 100644 --- a/tez-dag/src/test/java/org/apache/tez/dag/app/TestDAGAppMaster.java +++ b/tez-dag/src/test/java/org/apache/tez/dag/app/TestDAGAppMaster.java @@ -14,46 +14,33 @@ package org.apache.tez.dag.app; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertNotNull; -import static org.junit.Assert.assertNull; -import static org.junit.Assert.assertTrue; -import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.verify; - -import java.io.ByteArrayInputStream; -import java.io.DataInput; -import java.io.DataInputStream; -import java.io.DataOutput; -import java.io.File; -import java.io.FileOutputStream; -import java.io.IOException; -import java.nio.ByteBuffer; -import java.util.LinkedList; -import java.util.List; - -import com.google.common.base.Preconditions; import com.google.common.collect.BiMap; import com.google.common.collect.HashBiMap; import com.google.common.collect.Lists; import com.google.protobuf.ByteString; - import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileUtil; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.io.Text; import org.apache.hadoop.security.Credentials; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.token.SecretManager; import org.apache.hadoop.security.token.Token; import org.apache.hadoop.security.token.TokenIdentifier; +import org.apache.hadoop.test.LambdaTestUtils; +import org.apache.hadoop.util.Time; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.ContainerId; +import org.apache.hadoop.yarn.util.MonotonicClock; import org.apache.hadoop.yarn.util.SystemClock; import org.apache.tez.client.TezApiVersionInfo; +import org.apache.tez.common.Preconditions; import org.apache.tez.common.TezCommonUtils; import org.apache.tez.common.TezUtils; import org.apache.tez.common.security.JobTokenIdentifier; @@ -63,6 +50,7 @@ import org.apache.tez.dag.api.NamedEntityDescriptor; import org.apache.tez.dag.api.TezConfiguration; import org.apache.tez.dag.api.TezConstants; +import org.apache.tez.dag.api.TezUncheckedException; import org.apache.tez.dag.api.UserPayload; import org.apache.tez.dag.api.records.DAGProtos; import org.apache.tez.dag.api.records.DAGProtos.AMPluginDescriptorProto; @@ -70,13 +58,45 @@ import org.apache.tez.dag.api.records.DAGProtos.PlanLocalResourcesProto; import org.apache.tez.dag.api.records.DAGProtos.TezNamedEntityDescriptorProto; import org.apache.tez.dag.api.records.DAGProtos.TezUserPayloadProto; +import org.apache.tez.dag.app.dag.DAGState; +import org.apache.tez.dag.app.dag.Vertex; import org.apache.tez.dag.app.dag.impl.DAGImpl; import org.apache.tez.dag.app.rm.TaskSchedulerManager; import org.apache.tez.dag.records.TezDAGID; +import org.apache.tez.dag.records.TezVertexID; import org.junit.After; import org.junit.Assert; import org.junit.Before; import org.junit.Test; +import org.mockito.ArgumentCaptor; +import org.mockito.Mockito; + +import java.io.ByteArrayInputStream; +import java.io.DataInput; +import java.io.DataInputStream; +import java.io.DataOutput; +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; +import java.lang.reflect.Field; +import java.nio.ByteBuffer; +import java.time.Instant; +import java.util.Date; +import java.util.HashMap; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.spy; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; public class TestDAGAppMaster { @@ -86,15 +106,13 @@ public class TestDAGAppMaster { private static final String CL_NAME = "CL"; private static final String TC_NAME = "TC"; private static final String CLASS_SUFFIX = "_CLASS"; - private static final File TEST_DIR = new File( - System.getProperty("test.build.data", - System.getProperty("java.io.tmpdir")), - TestDAGAppMaster.class.getSimpleName()).getAbsoluteFile(); + private static final File TEST_DIR = new File(System.getProperty("test.build.data"), + TestDAGAppMaster.class.getName()).getAbsoluteFile(); @Before public void setup() { FileUtil.fullyDelete(TEST_DIR); - TEST_DIR.mkdir(); + TEST_DIR.mkdirs(); } @After @@ -326,6 +344,97 @@ public void testParseAllPluginsCustomAndYarnSpecified() throws IOException { assertEquals(TC_NAME + CLASS_SUFFIX, tcDescriptors.get(1).getClassName()); } + @Test(timeout = 60000) + public void testShutdownTezAMWithMissingRecoveryAndFailureOnMissingData() throws Exception { + + TezConfiguration conf = new TezConfiguration(); + conf.setBoolean(TezConfiguration.TEZ_AM_CREDENTIALS_MERGE, true); + conf.setBoolean(TezConfiguration.TEZ_LOCAL_MODE, true); + conf.set(TezConfiguration.TEZ_AM_STAGING_DIR, TEST_DIR.toString()); + conf.setBoolean(TezConfiguration.TEZ_AM_FAILURE_ON_MISSING_RECOVERY_DATA, true); + conf.setBoolean(TezConfiguration.DAG_RECOVERY_ENABLED, true); + + /* + Setting very high timeout because in case when TEZ_AM_FAILURE_ON_MISSING_RECOVERY_DATA is set, it should + not time out, it should get shutdown earlier only without the timeout flow kicking in + */ + conf.setInt(TezConfiguration.TEZ_SESSION_AM_DAG_SUBMIT_TIMEOUT_SECS, 1000000000); + ApplicationId appId = ApplicationId.newInstance(1, 1); + ApplicationAttemptId attemptId = ApplicationAttemptId.newInstance(appId, 2); + + FileSystem mockFs = mock(FileSystem.class); + when(mockFs.exists(any())).thenReturn(false); + + DAGAppMasterForTest dam = new DAGAppMasterForTest(attemptId, true); + + dam.init(conf); + Field field = DAGAppMasterForTest.class.getSuperclass().getDeclaredField("recoveryFS"); + field.setAccessible(true); + field.set(dam, mockFs); + + dam.start(); + + ArgumentCaptor captor = ArgumentCaptor.forClass(Path.class); + // This ensures that recovery data file system was called for getting summary files, and it will return false + verify(mockFs, times(2)).exists(captor.capture()); + + Assert.assertTrue(captor.getAllValues().get(1).toString().contains("/recovery/1/summary")); + Assert.assertTrue(captor.getAllValues().get(0).toString().contains("/recovery/1/RecoveryFatalErrorOccurred")); + + verify(dam.mockScheduler).setShouldUnregisterFlag(); + verify(dam.mockShutdown).shutdown(); + + /* + * Since the TEZ_AM_FAILURE_ON_MISSING_RECOVERY_DATA config is set, + * DAG will be in ERRORed state if recovery was missing for attempts > 1 + */ + assertEquals(DAGAppMasterState.ERROR, dam.getState()); + } + + @Test + public void testShutdownTezAMWithMissingRecoveryAndNoFailureOnMissingData() throws Exception { + + TezConfiguration conf = new TezConfiguration(); + conf.setBoolean(TezConfiguration.TEZ_AM_CREDENTIALS_MERGE, true); + conf.setBoolean(TezConfiguration.TEZ_LOCAL_MODE, true); + conf.set(TezConfiguration.TEZ_AM_STAGING_DIR, TEST_DIR.toString()); + conf.setBoolean(TezConfiguration.TEZ_AM_FAILURE_ON_MISSING_RECOVERY_DATA, false); + conf.setBoolean(TezConfiguration.DAG_RECOVERY_ENABLED, true); + conf.setInt(TezConfiguration.TEZ_SESSION_AM_DAG_SUBMIT_TIMEOUT_SECS, 1); + ApplicationId appId = ApplicationId.newInstance(1, 1); + ApplicationAttemptId attemptId = ApplicationAttemptId.newInstance(appId, 2); + + FileSystem mockFs = mock(FileSystem.class); + when(mockFs.exists(any())).thenReturn(false); + + DAGAppMasterForTest dam = new DAGAppMasterForTest(attemptId, true); + + dam.init(conf); + Field field = DAGAppMasterForTest.class.getSuperclass().getDeclaredField("recoveryFS"); + field.setAccessible(true); + field.set(dam, mockFs); + + dam.start(); + // Waiting for session timeout interval to kick in, which is set to 1 s + Thread.sleep(2000); + + ArgumentCaptor captor = ArgumentCaptor.forClass(Path.class); + // This ensures that recovery data file system was called for getting summary files, and it will return false + verify(mockFs, times(2)).exists(captor.capture()); + + Assert.assertTrue(captor.getAllValues().get(1).toString().contains("/recovery/1/summary")); + Assert.assertTrue(captor.getAllValues().get(0).toString().contains("/recovery/1/RecoveryFatalErrorOccurred")); + + verify(dam.mockScheduler).setShouldUnregisterFlag(); + verify(dam.mockShutdown).shutdown(); + + /* + * Since the TEZ_AM_FAILURE_ON_MISSING_RECOVERY_DATA config is unset, + * DAG will be in SUCCEEDED state if recovery was missing and timeout got triggered for attempts > 1 + */ + assertEquals(DAGAppMasterState.SUCCEEDED, dam.getState()); + } + private void verifyDescAndMap(List descriptors, BiMap map, int numExpected, boolean verifyPayload, String... expectedNames) throws @@ -387,6 +496,107 @@ public void testDagCredentialsWithMerge() throws Exception { testDagCredentials(true); } + @Test + public void testGetACLFailure() throws Exception { + ApplicationId appId = ApplicationId.newInstance(1, 1); + ApplicationAttemptId attemptId = ApplicationAttemptId.newInstance(appId, 2); + DAGAppMasterForTest dam = new DAGAppMasterForTest(attemptId, true); + TezConfiguration conf = new TezConfiguration(false); + conf.setBoolean(TezConfiguration.DAG_RECOVERY_ENABLED, false); + dam.init(conf); + LambdaTestUtils.intercept(TezUncheckedException.class, + "Cannot get ApplicationACLs before all services have started, The current service state is INITED", + () -> dam.getContext().getApplicationACLs()); + dam.start(); + dam.stop(); + Mockito.when(dam.mockShutdown.getShutdownTime()).thenReturn(Date.from(Instant.ofEpochMilli(Time.now()))); + LambdaTestUtils.intercept(TezUncheckedException.class, + " Cannot get ApplicationACLs before all services have started, " + + "The current service state is STOPPED. The shutdown hook started at " + + dam.mockShutdown.getShutdownTime(), () -> dam.getContext().getApplicationACLs()); + } + + @Test + public void testBadProgress() throws Exception { + TezConfiguration conf = new TezConfiguration(); + conf.setBoolean(TezConfiguration.TEZ_AM_CREDENTIALS_MERGE, true); + conf.setBoolean(TezConfiguration.TEZ_LOCAL_MODE, true); + conf.set(TezConfiguration.TEZ_AM_STAGING_DIR, TEST_DIR.toString()); + ApplicationId appId = ApplicationId.newInstance(1, 1); + ApplicationAttemptId attemptId = ApplicationAttemptId.newInstance(appId, 1); + + // create some sample AM credentials + Credentials amCreds = new Credentials(); + JobTokenSecretManager jtsm = new JobTokenSecretManager(); + JobTokenIdentifier identifier = new JobTokenIdentifier( + new Text(appId.toString())); + Token sessionToken = + new Token(identifier, jtsm); + sessionToken.setService(identifier.getJobId()); + TokenCache.setSessionToken(sessionToken, amCreds); + TestTokenSecretManager ttsm = new TestTokenSecretManager(); + Text tokenAlias1 = new Text("alias1"); + Token amToken1 = new Token( + new TestTokenIdentifier(new Text("amtoken1")), ttsm); + amCreds.addToken(tokenAlias1, amToken1); + + FileSystem fs = FileSystem.getLocal(conf); + FSDataOutputStream sessionJarsPBOutStream = + TezCommonUtils.createFileForAM(fs, new Path(TEST_DIR.toString(), + TezConstants.TEZ_AM_LOCAL_RESOURCES_PB_FILE_NAME)); + DAGProtos.PlanLocalResourcesProto.getDefaultInstance() + .writeDelimitedTo(sessionJarsPBOutStream); + sessionJarsPBOutStream.close(); + DAGAppMaster am = spy(new DAGAppMaster(attemptId, + ContainerId.newContainerId(attemptId, 1), + "127.0.0.1", 0, 0, new MonotonicClock(), 1, true, + TEST_DIR.toString(), new String[] {TEST_DIR.toString()}, + new String[] {TEST_DIR.toString()}, + new TezApiVersionInfo().getVersion(), amCreds, + "someuser", null)); + when(am.getState()).thenReturn(DAGAppMasterState.RUNNING); + am.init(conf); + am.start(); + Credentials dagCreds = new Credentials(); + Token dagToken1 = new Token( + new TestTokenIdentifier(new Text("dagtoken1")), ttsm); + dagCreds.addToken(tokenAlias1, dagToken1); + Text tokenAlias3 = new Text("alias3"); + Token dagToken2 = new Token( + new TestTokenIdentifier(new Text("dagtoken2")), ttsm); + dagCreds.addToken(tokenAlias3, dagToken2); + TezDAGID dagId = TezDAGID.getInstance(appId, 1); + DAGPlan dagPlan = DAGPlan.newBuilder() + .setName("somedag") + .setCredentialsBinary( + DagTypeConverters.convertCredentialsToProto(dagCreds)) + .build(); + DAGImpl dag = spy(am.createDAG(dagPlan, dagId)); + am.setCurrentDAG(dag); + when(dag.getState()).thenReturn(DAGState.RUNNING); + Map map = new HashMap(); + TezVertexID mockVertexID = mock(TezVertexID.class); + Vertex mockVertex = mock(Vertex.class); + when(mockVertex.getProgress()).thenReturn(Float.NaN); + map.put(mockVertexID, mockVertex); + when(dag.getVertices()).thenReturn(map); + when(dag.getTotalVertices()).thenReturn(1); + Assert.assertEquals("Progress was NaN and should be reported as 0", + 0, am.getProgress(), 0); + when(mockVertex.getProgress()).thenReturn(-10f); + Assert.assertEquals("Progress was negative and should be reported as 0", + 0, am.getProgress(), 0); + when(mockVertex.getProgress()).thenReturn(1.0000567f); + Assert.assertEquals( + "Progress was greater than 1 by a small float precision " + + "1.0000567 and should be reported as 1", + 1.0f, am.getProgress(), 0.0f); + when(mockVertex.getProgress()).thenReturn(10f); + Assert.assertEquals( + "Progress was greater than 1 and should be reported as 1", + 1.0f, am.getProgress(), 0.0f); + } + @SuppressWarnings("deprecation") private void testDagCredentials(boolean doMerge) throws IOException { TezConfiguration conf = new TezConfiguration(); @@ -540,9 +750,10 @@ public TestTokenIdentifier createIdentifier() { } } - private static class DAGAppMasterForTest extends DAGAppMaster { + public static class DAGAppMasterForTest extends DAGAppMaster { private DAGAppMasterShutdownHandler mockShutdown; private TaskSchedulerManager mockScheduler = mock(TaskSchedulerManager.class); + private DAGAppMasterReadinessService mockAppMasterReadinessService = mock(DAGAppMasterReadinessService.class); public DAGAppMasterForTest(ApplicationAttemptId attemptId, boolean isSession) { super(attemptId, ContainerId.newContainerId(attemptId, 1), "hostname", 12345, 12346, @@ -551,7 +762,7 @@ public DAGAppMasterForTest(ApplicationAttemptId attemptId, boolean isSession) { new TezDagVersionInfo().getVersion(), createCredentials(), "jobname", null); } - private static Credentials createCredentials() { + public static Credentials createCredentials() { Credentials creds = new Credentials(); JobTokenSecretManager jtsm = new JobTokenSecretManager(); JobTokenIdentifier jtid = new JobTokenIdentifier(new Text()); @@ -560,9 +771,10 @@ private static Credentials createCredentials() { return creds; } - private static void stubSessionResources() throws IOException { - FileOutputStream out = new FileOutputStream( - new File(TEST_DIR, TezConstants.TEZ_AM_LOCAL_RESOURCES_PB_FILE_NAME)); + private static void stubSessionResources(Configuration conf) throws IOException { + File file = new File(TEST_DIR, TezConstants.TEZ_AM_LOCAL_RESOURCES_PB_FILE_NAME); + conf.set(TezConfiguration.TEZ_AM_STAGING_DIR, TEST_DIR.getAbsolutePath()); + FileOutputStream out = new FileOutputStream(file); PlanLocalResourcesProto planProto = PlanLocalResourcesProto.getDefaultInstance(); planProto.writeDelimitedTo(out); out.close(); @@ -570,7 +782,7 @@ private static void stubSessionResources() throws IOException { @Override public synchronized void serviceInit(Configuration conf) throws Exception { - stubSessionResources(); + stubSessionResources(conf); conf.setBoolean(TezConfiguration.TEZ_AM_WEBSERVICE_ENABLE, false); super.serviceInit(conf); } @@ -586,5 +798,10 @@ protected TaskSchedulerManager createTaskSchedulerManager( List taskSchedulerDescriptors) { return mockScheduler; } + + @Override + protected DAGAppMasterReadinessService createAppMasterReadinessService() { + return mockAppMasterReadinessService; + } } } diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/TestMockDAGAppMaster.java b/tez-dag/src/test/java/org/apache/tez/dag/app/TestMockDAGAppMaster.java index 6268912b2f..6ec73e74e1 100644 --- a/tez-dag/src/test/java/org/apache/tez/dag/app/TestMockDAGAppMaster.java +++ b/tez-dag/src/test/java/org/apache/tez/dag/app/TestMockDAGAppMaster.java @@ -45,6 +45,8 @@ import org.apache.hadoop.yarn.api.records.URL; import org.apache.log4j.Level; import org.apache.log4j.Logger; +import org.apache.tez.common.counters.AggregateFrameworkCounter; +import org.apache.tez.common.counters.AggregateTezCounterDelegate; import org.apache.tez.common.counters.CounterGroup; import org.apache.tez.common.counters.DAGCounter; import org.apache.tez.common.counters.TezCounters; @@ -213,7 +215,8 @@ public void testInternalPreemption() throws Exception { TezVertexID vertexId = TezVertexID.getInstance(dagImpl.getID(), 0); TezTaskAttemptID killedTaId = TezTaskAttemptID.getInstance(TezTaskID.getInstance(vertexId, 0), 0); TaskAttempt killedTa = dagImpl.getVertex(vA.getName()).getTask(0).getAttempt(killedTaId); - Assert.assertEquals(TaskAttemptState.KILLED, killedTa.getState()); + //Refer to TEZ-3950 + Assert.assertTrue(killedTa.getState().equals(TaskAttemptState.KILLED) || killedTa.getState().equals(TaskAttemptState.FAILED)); tezClient.stop(); } @@ -258,7 +261,7 @@ public void testBasicEvents() throws Exception { Assert.assertEquals(DAGStatus.State.SUCCEEDED, dagClient.getDAGStatus(null).getState()); VertexImpl vImpl = (VertexImpl) dagImpl.getVertex(vB.getName()); TaskImpl tImpl = (TaskImpl) vImpl.getTask(1); - TezTaskAttemptID taId = TezTaskAttemptID.getInstance(tImpl.getTaskId(), 0); + TezTaskAttemptID taId = TezTaskAttemptID.getInstance(tImpl.getTaskID(), 0); List tEvents = vImpl.getTaskAttemptTezEvents(taId, 0, 0, 1000).getEvents(); Assert.assertEquals(2, tEvents.size()); // 2 from vA Assert.assertEquals(vA.getName(), tEvents.get(0).getDestinationInfo().getEdgeVertexName()); @@ -272,7 +275,7 @@ public void testBasicEvents() throws Exception { (targetIndex1 == 0 && targetIndex2 == 1) || (targetIndex1 == 1 && targetIndex2 == 0)); vImpl = (VertexImpl) dagImpl.getVertex(vC.getName()); tImpl = (TaskImpl) vImpl.getTask(1); - taId = TezTaskAttemptID.getInstance(tImpl.getTaskId(), 0); + taId = TezTaskAttemptID.getInstance(tImpl.getTaskID(), 0); tEvents = vImpl.getTaskAttemptTezEvents(taId, 0, 0, 1000).getEvents(); Assert.assertEquals(2, tEvents.size()); // 2 from vA Assert.assertEquals(vA.getName(), tEvents.get(0).getDestinationInfo().getEdgeVertexName()); @@ -286,7 +289,7 @@ public void testBasicEvents() throws Exception { (targetIndex1 == 0 && targetIndex2 == 1) || (targetIndex1 == 1 && targetIndex2 == 0)); vImpl = (VertexImpl) dagImpl.getVertex(vD.getName()); tImpl = (TaskImpl) vImpl.getTask(1); - taId = TezTaskAttemptID.getInstance(tImpl.getTaskId(), 0); + taId = TezTaskAttemptID.getInstance(tImpl.getTaskID(), 0); tEvents = vImpl.getTaskAttemptTezEvents(taId, 0, 0, 1000).getEvents(); Assert.assertEquals(1, tEvents.size()); // 1 from vA Assert.assertEquals(vA.getName(), tEvents.get(0).getDestinationInfo().getEdgeVertexName()); @@ -297,8 +300,7 @@ public void testBasicEvents() throws Exception { } public static class LegacyEdgeTestEdgeManager extends EdgeManagerPlugin { - List destinationInputIndices = - Collections.unmodifiableList(Collections.singletonList(0)); + List destinationInputIndices = Collections.singletonList(0); public LegacyEdgeTestEdgeManager(EdgeManagerPluginContext context) { super(context); } @@ -396,17 +398,17 @@ public void testMixedEdgeRouting() throws Exception { // vC uses on demand routing and its task does not provide events VertexImpl vImpl = (VertexImpl) dagImpl.getVertex(vC.getName()); TaskImpl tImpl = (TaskImpl) vImpl.getTask(0); - TezTaskAttemptID taId = TezTaskAttemptID.getInstance(tImpl.getTaskId(), 0); + TezTaskAttemptID taId = TezTaskAttemptID.getInstance(tImpl.getTaskID(), 0); Assert.assertEquals(0, tImpl.getTaskAttemptTezEvents(taId, 0, 1000).size()); // vD is mixed mode and only 1 out of 2 edges does legacy routing with task providing events vImpl = (VertexImpl) dagImpl.getVertex(vD.getName()); tImpl = (TaskImpl) vImpl.getTask(0); - taId = TezTaskAttemptID.getInstance(tImpl.getTaskId(), 0); + taId = TezTaskAttemptID.getInstance(tImpl.getTaskID(), 0); Assert.assertEquals(1, tImpl.getTaskAttemptTezEvents(taId, 0, 1000).size()); // vE has single legacy edge and does not use on demand routing and its task provides events vImpl = (VertexImpl) dagImpl.getVertex(vE.getName()); tImpl = (TaskImpl) vImpl.getTask(0); - taId = TezTaskAttemptID.getInstance(tImpl.getTaskId(), 0); + taId = TezTaskAttemptID.getInstance(tImpl.getTaskID(), 0); Assert.assertEquals(1, tImpl.getTaskAttemptTezEvents(taId, 0, 1000).size()); tezClient.stop(); @@ -457,6 +459,88 @@ public void launch(ContainerLaunchRequest event) { tezClient.stop(); } + @Test + public void testCountersAggregation() throws Exception { + TezConfiguration tezconf = new TezConfiguration(defaultConf); + MockTezClient tezClient = new MockTezClient("testMockAM", tezconf, true, null, null, null, + null, false, false); + tezClient.start(); + + final String vAName = "A"; + final String vBName = "B"; + final String procCounterName = "Proc"; + final String globalCounterName = "Global"; + DAG dag = DAG.create("testCountersAggregation"); + Vertex vA = Vertex.create(vAName, ProcessorDescriptor.create("Proc.class"), 10); + Vertex vB = Vertex.create(vBName, ProcessorDescriptor.create("Proc.class"), 1); + dag.addVertex(vA) + .addVertex(vB) + .addEdge( + Edge.create(vA, vB, EdgeProperty.create(DataMovementType.SCATTER_GATHER, + DataSourceType.PERSISTED, SchedulingType.SEQUENTIAL, + OutputDescriptor.create("Out"), InputDescriptor.create("In")))); + TezCounters temp = new TezCounters(); + temp.findCounter(new String(globalCounterName), new String(globalCounterName)).increment(1); + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + DataOutput out = new DataOutputStream(bos); + temp.write(out); + final byte[] payload = bos.toByteArray(); + + MockDAGAppMaster mockApp = tezClient.getLocalClient().getMockApp(); + MockContainerLauncher mockLauncher = mockApp.getContainerLauncher(); + mockLauncher.startScheduling(false); + mockApp.countersDelegate = new CountersDelegate() { + int counterValue = 0; + @Override + public TezCounters getCounters(TaskSpec taskSpec) { + String vName = taskSpec.getVertexName(); + TezCounters counters = new TezCounters(); + final DataInputByteBuffer in = new DataInputByteBuffer(); + in.reset(ByteBuffer.wrap(payload)); + try { + // this ensures that the serde code path is covered. + // the internal merges of counters covers the constructor code path. + counters.readFields(in); + } catch (IOException e) { + Assert.fail(e.getMessage()); + } + counters.findCounter(vName, procCounterName).setValue(++counterValue); + for (OutputSpec output : taskSpec.getOutputs()) { + counters.findCounter(vName, output.getDestinationVertexName()).setValue(++counterValue); + } + for (InputSpec input : taskSpec.getInputs()) { + counters.findCounter(vName, input.getSourceVertexName()).setValue(++counterValue); + } + return counters; + } + }; + mockApp.doSleep = false; + DAGClient dagClient = tezClient.submitDAG(dag); + mockLauncher.waitTillContainersLaunched(); + DAGImpl dagImpl = (DAGImpl) mockApp.getContext().getCurrentDAG(); + mockLauncher.startScheduling(true); + DAGStatus status = dagClient.waitForCompletion(); + Assert.assertEquals(DAGStatus.State.SUCCEEDED, status.getState()); + TezCounters counters = dagImpl.getAllCounters(); + + // verify processor counters + VertexImpl vAImpl = (VertexImpl) dagImpl.getVertex(vAName); + VertexImpl vBImpl = (VertexImpl) dagImpl.getVertex(vBName); + TezCounters vACounters = vAImpl.getAllCounters(); + TezCounters vBCounters = vBImpl.getAllCounters(); + + Assert.assertEquals(19, ((AggregateTezCounterDelegate)vACounters.findCounter(vAName, procCounterName)).getMax()); + Assert.assertEquals(1, ((AggregateTezCounterDelegate)vACounters.findCounter(vAName, procCounterName)).getMin()); + Assert.assertEquals(20, ((AggregateTezCounterDelegate)vACounters.findCounter(vAName, vBName)).getMax()); + Assert.assertEquals(2, ((AggregateTezCounterDelegate)vACounters.findCounter(vAName, vBName)).getMin()); + + Assert.assertEquals(21, ((AggregateTezCounterDelegate)vBCounters.findCounter(vBName, procCounterName)).getMin()); + Assert.assertEquals(21, ((AggregateTezCounterDelegate)vBCounters.findCounter(vBName, procCounterName)).getMax()); + Assert.assertEquals(22, ((AggregateTezCounterDelegate)vBCounters.findCounter(vBName, vAName)).getMin()); + Assert.assertEquals(22, ((AggregateTezCounterDelegate)vBCounters.findCounter(vBName, vAName)).getMax()); + + tezClient.stop(); + } @Test (timeout = 10000) public void testBasicCounters() throws Exception { @@ -816,7 +900,7 @@ public void testMultipleSubmissions() throws Exception { lrVertex.put(lrName2, LocalResource.newInstance(URL.newInstance("file", "localhost", 0, "/test1"), LocalResourceType.FILE, LocalResourceVisibility.PUBLIC, 1, 1)); - DAG dag = DAG.create("test").addTaskLocalFiles(lrDAG); + DAG dag = DAG.create("DAG-testMultipleSubmissions").addTaskLocalFiles(lrDAG); Vertex vA = Vertex.create("A", ProcessorDescriptor.create("Proc.class"), 5).addTaskLocalFiles(lrVertex); dag.addVertex(vA); @@ -873,7 +957,7 @@ public void testInitFailed() throws Exception { tezClient.start(); } catch (Exception e) { e.printStackTrace(); - Assert.assertEquals("FailInit", e.getCause().getCause().getMessage()); + Assert.assertEquals("FailInit", e.getCause().getCause().getCause().getMessage()); MockDAGAppMaster mockApp = tezClient.getLocalClient().getMockApp(); // will timeout if DAGAppMasterShutdownHook is not invoked mockApp.waitForServiceToStop(Integer.MAX_VALUE); @@ -889,7 +973,7 @@ public void testStartFailed() { tezClient.start(); } catch (Exception e) { e.printStackTrace(); - Assert.assertEquals("FailStart", e.getCause().getCause().getMessage()); + Assert.assertEquals("FailStart", e.getCause().getCause().getCause().getMessage()); MockDAGAppMaster mockApp = tezClient.getLocalClient().getMockApp(); // will timeout if DAGAppMasterShutdownHook is not invoked mockApp.waitForServiceToStop(Integer.MAX_VALUE); @@ -1128,7 +1212,7 @@ public void testDAGFinishedRecoveryError() throws Exception { MockContainerLauncher mockLauncher = mockApp.getContainerLauncher(); mockLauncher.startScheduling(true); - DAG dag = DAG.create("test"); + DAG dag = DAG.create("DAG-testDAGFinishedRecoveryError"); Vertex vA = Vertex.create("A", ProcessorDescriptor.create("Proc.class"), 5); dag.addVertex(vA); diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/TestRecoveryParser.java b/tez-dag/src/test/java/org/apache/tez/dag/app/TestRecoveryParser.java index 6673b39e5b..57148ee7e9 100644 --- a/tez-dag/src/test/java/org/apache/tez/dag/app/TestRecoveryParser.java +++ b/tez-dag/src/test/java/org/apache/tez/dag/app/TestRecoveryParser.java @@ -20,12 +20,15 @@ import java.io.IOException; import java.nio.ByteBuffer; +import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Random; +import com.google.common.collect.Sets; +import com.google.protobuf.CodedInputStream; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; @@ -36,6 +39,8 @@ import org.apache.hadoop.yarn.util.SystemClock; import org.apache.log4j.Level; import org.apache.log4j.LogManager; +import org.apache.tez.dag.api.TaskLocationHint; +import org.apache.tez.dag.api.VertexLocationHint; import org.apache.tez.dag.api.oldrecords.TaskAttemptState; import org.apache.tez.dag.api.oldrecords.TaskState; import org.apache.tez.dag.api.records.DAGProtos.DAGPlan; @@ -59,6 +64,7 @@ import org.apache.tez.dag.history.events.TaskFinishedEvent; import org.apache.tez.dag.history.events.TaskStartedEvent; import org.apache.tez.dag.history.events.VertexCommitStartedEvent; +import org.apache.tez.dag.history.events.VertexConfigurationDoneEvent; import org.apache.tez.dag.history.events.VertexFinishedEvent; import org.apache.tez.dag.history.events.VertexGroupCommitFinishedEvent; import org.apache.tez.dag.history.events.VertexGroupCommitStartedEvent; @@ -91,6 +97,8 @@ public class TestRecoveryParser { private Path recoveryPath; private DAGAppMaster mockAppMaster; private DAGImpl mockDAGImpl; + // Protobuf message limit is 64 MB by default + private static final int PROTOBUF_DEFAULT_SIZE_LIMIT = 64 << 20; @Before public void setUp() throws IllegalArgumentException, IOException { @@ -103,9 +111,8 @@ public void setUp() throws IllegalArgumentException, IOException { mockAppMaster.dagIDs = new HashSet(); when(mockAppMaster.getConfig()).thenReturn(new Configuration()); mockDAGImpl = mock(DAGImpl.class); - when(mockAppMaster.createDAG(any(DAGPlan.class), any(TezDAGID.class))).thenReturn(mockDAGImpl); + when(mockAppMaster.createDAG(any(), any())).thenReturn(mockDAGImpl); parser = new RecoveryParser(mockAppMaster, localFS, recoveryPath, 3); - LogManager.getRootLogger().setLevel(Level.DEBUG); } private DAGSummaryData createDAGSummaryData(TezDAGID dagId, boolean completed) { @@ -191,7 +198,7 @@ public void testSkipAllOtherEvents_1() throws IOException { assertTrue(dagData.reason.contains("DAG Commit was in progress, not recoverable,")); // DAGSubmittedEvent is handled but DAGInitializedEvent and DAGStartedEvent in the next attempt are both skipped // due to the dag is not recoerable. - verify(mockAppMaster).createDAG(any(DAGPlan.class),any(TezDAGID.class)); + verify(mockAppMaster).createDAG(any(), any()); assertNull(dagData.getDAGInitializedEvent()); assertNull(dagData.getDAGStartedEvent()); } @@ -238,7 +245,7 @@ public void testSkipAllOtherEvents_2() throws IOException { assertEquals(DAGState.FAILED, dagData.dagState); assertEquals(true, dagData.isCompleted); // DAGSubmittedEvent, DAGInitializedEvent and DAGFinishedEvent is handled - verify(mockAppMaster).createDAG(any(DAGPlan.class),any(TezDAGID.class)); + verify(mockAppMaster).createDAG(any(), any()); // DAGInitializedEvent may not been handled before DAGFinishedEvent, // because DAGFinishedEvent's writeToRecoveryImmediately is true assertNotNull(dagData.getDAGFinishedEvent()); @@ -267,7 +274,7 @@ public void testLastCorruptedRecoveryRecord() throws IOException { null, "user", new Configuration(), null, null))); // wait until DAGSubmittedEvent is handled in the RecoveryEventHandling thread rService.await(); - rService.outputStreamMap.get(dagID).writeUTF("INVALID_DATA"); + rService.outputStreamMap.get(dagID).write("INVALID_DATA".getBytes("UTF-8")); rService.stop(); // write data in attempt_2 @@ -278,7 +285,7 @@ public void testLastCorruptedRecoveryRecord() throws IOException { rService.handle(new DAGHistoryEvent(dagID, new DAGInitializedEvent(dagID, 1L, "user", dagPlan.getName(), null))); rService.await(); - rService.outputStreamMap.get(dagID).writeUTF("INVALID_DATA"); + rService.outputStreamMap.get(dagID).write("INVALID_DATA".getBytes("UTF-8")); rService.stop(); // corrupted last records will be skipped but the whole recovery logs will be read @@ -287,7 +294,7 @@ public void testLastCorruptedRecoveryRecord() throws IOException { assertEquals(null, dagData.reason); assertEquals(false, dagData.nonRecoverable); // verify DAGSubmitedEvent & DAGInititlizedEvent is handled. - verify(mockAppMaster).createDAG(any(DAGPlan.class),any(TezDAGID.class)); + verify(mockAppMaster).createDAG(any(), any()); assertNotNull(dagData.getDAGInitializedEvent()); } @@ -618,6 +625,75 @@ public void testRecoverableNonSummary2() throws IOException { + ", but its full recovery events are not seen")); } + @Test(timeout=20000) + public void testRecoveryLargeEventData() throws IOException { + ApplicationId appId = ApplicationId.newInstance(System.currentTimeMillis(), 1); + TezDAGID dagID = TezDAGID.getInstance(appId, 1); + AppContext appContext = mock(AppContext.class); + when(appContext.getCurrentRecoveryDir()).thenReturn(new Path(recoveryPath+"/1")); + when(appContext.getClock()).thenReturn(new SystemClock()); + when(mockDAGImpl.getID()).thenReturn(dagID); + when(appContext.getHadoopShim()).thenReturn(new DefaultHadoopShim()); + when(appContext.getApplicationID()).thenReturn(appId); + + RecoveryService rService = new RecoveryService(appContext); + Configuration conf = new Configuration(); + conf.setBoolean(RecoveryService.TEZ_TEST_RECOVERY_DRAIN_EVENTS_WHEN_STOPPED, true); + rService.init(conf); + rService.start(); + + DAGPlan dagPlan = TestDAGImpl.createTestDAGPlan(); + // DAG DAGSubmittedEvent -> DAGInitializedEvent -> DAGStartedEvent + rService.handle(new DAGHistoryEvent(dagID, + new DAGSubmittedEvent(dagID, 1L, dagPlan, ApplicationAttemptId.newInstance(appId, 1), + null, "user", new Configuration(), null, null))); + DAGInitializedEvent dagInitedEvent = new DAGInitializedEvent(dagID, 100L, + "user", "dagName", null); + DAGStartedEvent dagStartedEvent = new DAGStartedEvent(dagID, 0L, "user", "dagName"); + rService.handle(new DAGHistoryEvent(dagID, dagInitedEvent)); + rService.handle(new DAGHistoryEvent(dagID, dagStartedEvent)); + + // Create a Recovery event larger than 64 MB to verify default max protobuf size + ArrayList taskLocationHints = new ArrayList<>(100000); + TaskLocationHint taskLocationHint = TaskLocationHint.createTaskLocationHint( + Sets.newHashSet("aaaaaaaaaaaaaaa.aaaaaaaaaaaaaaa.aaaaaaaaaaaaaaa", + "bbbbbbbbbbbbbbb.bbbbbbbbbbbbbbb.bbbbbbbbbbbbbbb", + "ccccccccccccccc.ccccccccccccccc.ccccccccccccccc", + "ddddddddddddddd.ddddddddddddddd.ddddddddddddddd", + "eeeeeeeeeeeeeee.eeeeeeeeeeeeeee.eeeeeeeeeeeeeee", + "fffffffffffffff.fffffffffffffff.fffffffffffffff", + "ggggggggggggggg.ggggggggggggggg.ggggggggggggggg", + "hhhhhhhhhhhhhhh.hhhhhhhhhhhhhhh.hhhhhhhhhhhhhhh", + "iiiiiiiiiiiiiii.iiiiiiiiiiiiiii.iiiiiiiiiiiiiii", + "jjjjjjjjjjjjjjj.jjjjjjjjjjjjjjj.jjjjjjjjjjjjjjj", + "kkkkkkkkkkkkkkk.kkkkkkkkkkkkkkk.kkkkkkkkkkkkkkk", + "lllllllllllllll.lllllllllllllll.lllllllllllllll", + "mmmmmmmmmmmmmmm.mmmmmmmmmmmmmmm.mmmmmmmmmmmmmmm", + "nnnnnnnnnnnnnnn.nnnnnnnnnnnnnnn.nnnnnnnnnnnnnnn"), + Sets.newHashSet("rack1", "rack2", "rack3")); + for (int i = 0; i < 100000; i++) { + taskLocationHints.add(taskLocationHint); + } + + TezVertexID v0Id = TezVertexID.getInstance(dagID, 0); + VertexLocationHint vertexLocationHint = VertexLocationHint.create(taskLocationHints); + VertexConfigurationDoneEvent vertexConfigurationDoneEvent = new VertexConfigurationDoneEvent( + v0Id, 0, 100000, vertexLocationHint, null, null, false); + // Verify large protobuf message + assertTrue(vertexConfigurationDoneEvent.toProto().getSerializedSize() > PROTOBUF_DEFAULT_SIZE_LIMIT ); + rService.handle(new DAGHistoryEvent(dagID, vertexConfigurationDoneEvent)); + rService.stop(); + + DAGRecoveryData dagData = parser.parseRecoveryData(); + VertexRecoveryData v0data = dagData.getVertexRecoveryData(v0Id); + assertNotNull("Vertex Recovery Data should be non-null", v0data); + VertexConfigurationDoneEvent parsedVertexConfigurationDoneEvent = v0data.getVertexConfigurationDoneEvent(); + assertNotNull("Vertex Configuration Done Event should be non-null", parsedVertexConfigurationDoneEvent); + VertexLocationHint parsedVertexLocationHint = parsedVertexConfigurationDoneEvent.getVertexLocationHint(); + assertNotNull("Vertex Location Hint should be non-null", parsedVertexLocationHint); + assertEquals(parsedVertexLocationHint.getTaskLocationHints().size(), 100000); + } + @Test(timeout=5000) public void testRecoveryData() throws IOException { ApplicationId appId = ApplicationId.newInstance(System.currentTimeMillis(), 1); diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/TestSpeculation.java b/tez-dag/src/test/java/org/apache/tez/dag/app/TestSpeculation.java index 9a39facbe0..bc5c7ea4da 100644 --- a/tez-dag/src/test/java/org/apache/tez/dag/app/TestSpeculation.java +++ b/tez-dag/src/test/java/org/apache/tez/dag/app/TestSpeculation.java @@ -19,8 +19,15 @@ package org.apache.tez.dag.app; import java.io.IOException; +import java.lang.annotation.Retention; +import java.lang.annotation.RetentionPolicy; +import java.util.Arrays; +import java.util.Collection; +import java.util.HashMap; +import java.util.Map; import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicInteger; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; @@ -42,39 +49,205 @@ import org.apache.tez.dag.app.dag.Task; import org.apache.tez.dag.app.dag.TaskAttempt; import org.apache.tez.dag.app.dag.impl.DAGImpl; +import org.apache.tez.dag.app.dag.speculation.legacy.LegacySpeculator; +import org.apache.tez.dag.app.dag.speculation.legacy.LegacyTaskRuntimeEstimator; +import org.apache.tez.dag.app.dag.speculation.legacy.SimpleExponentialTaskRuntimeEstimator; +import org.apache.tez.dag.app.dag.speculation.legacy.TaskRuntimeEstimator; import org.apache.tez.dag.library.vertexmanager.ShuffleVertexManager; import org.apache.tez.dag.records.TaskAttemptTerminationCause; import org.apache.tez.dag.records.TezTaskAttemptID; import org.apache.tez.dag.records.TezTaskID; import org.apache.tez.dag.records.TezVertexID; +import org.junit.After; import org.junit.Assert; +import org.junit.Before; +import org.junit.Rule; import org.junit.Test; import com.google.common.base.Joiner; +import org.junit.rules.TestRule; +import org.junit.runner.Description; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; +import org.junit.runners.model.Statement; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +/** + * test speculation behavior given the list of estimator classes. + */ +@RunWith(Parameterized.class) public class TestSpeculation { - static Configuration defaultConf; - static FileSystem localFs; - + private final static Logger LOG = LoggerFactory.getLogger(TezConfiguration.class); + + private static final String ASSERT_SPECULATIONS_COUNT_MSG = + "Number of attempts after Speculation should be two"; + private static final String UNIT_EXCEPTION_MESSAGE = + "test timed out after"; + + /** + * {@link MockDAGAppMaster#launcherSleepTime} advances tasks every 1 millisecond. + * We want our test task to take at least slightly more than 1 second. This is because + * MockDAGAppMaster's mock clock advances clock 1 second at each tick. If we are unlucky + * this may cause speculator to wait 1 second between each evaluation. If we are really + * unlucky, our test tasks finish before speculator has a chance to evaluate and speculate + * them. That is why we want the tasks to take at least one second. + */ + private static final int NUM_UPDATES_FOR_TEST_TASK = 1200; + private static final int ASSERT_SPECULATIONS_COUNT_RETRIES = 3; + private Configuration defaultConf; + private FileSystem localFs; + + /** + * The Mock app. + */ MockDAGAppMaster mockApp; + + /** + * The Mock launcher. + */ MockContainerLauncher mockLauncher; - - static { + + /** + * The interface Retry. + */ + @Retention(RetentionPolicy.RUNTIME) + public @interface Retry {} + + /** + * The type Retry rule. + */ + class RetryRule implements TestRule { + + private AtomicInteger retryCount; + + /** + * Instantiates a new Retry rule. + * + * @param retries the retries + */ + RetryRule(int retries) { + super(); + this.retryCount = new AtomicInteger(retries); + } + + @Override + public Statement apply(final Statement base, + final Description description) { + return new Statement() { + @Override + public void evaluate() throws Throwable { + Throwable caughtThrowable = null; + + while (retryCount.getAndDecrement() > 0) { + try { + base.evaluate(); + return; + } catch (Throwable t) { + caughtThrowable = t; + if (retryCount.get() > 0 && + description.getAnnotation(Retry.class) != null) { + if (!((t instanceof AssertionError && t.getMessage() + .contains(ASSERT_SPECULATIONS_COUNT_MSG)) + || (t instanceof Exception && t.getMessage() + .contains(UNIT_EXCEPTION_MESSAGE)))) { + throw caughtThrowable; + } + LOG.warn("{} : Failed. Retries remaining: {}", description.getDisplayName(), retryCount.toString()); + } else { + throw caughtThrowable; + } + } + } + } + }; + } + } + + /** + * The Rule. + */ + @Rule + public RetryRule rule = new RetryRule(ASSERT_SPECULATIONS_COUNT_RETRIES); + + /** + * Sets default conf. + */ + @Before + public void setDefaultConf() { try { defaultConf = new Configuration(false); defaultConf.set("fs.defaultFS", "file:///"); defaultConf.setBoolean(TezConfiguration.TEZ_LOCAL_MODE, true); defaultConf.setBoolean(TezConfiguration.TEZ_AM_SPECULATION_ENABLED, true); - defaultConf.setFloat(ShuffleVertexManager.TEZ_SHUFFLE_VERTEX_MANAGER_MIN_SRC_FRACTION, 1); - defaultConf.setFloat(ShuffleVertexManager.TEZ_SHUFFLE_VERTEX_MANAGER_MAX_SRC_FRACTION, 1); + defaultConf.setFloat( + ShuffleVertexManager.TEZ_SHUFFLE_VERTEX_MANAGER_MIN_SRC_FRACTION, 1); + defaultConf.setFloat( + ShuffleVertexManager.TEZ_SHUFFLE_VERTEX_MANAGER_MAX_SRC_FRACTION, 1); localFs = FileSystem.getLocal(defaultConf); - String stagingDir = "target" + Path.SEPARATOR + TestSpeculation.class.getName() + "-tmpDir"; + String stagingDir = + "target" + Path.SEPARATOR + TestSpeculation.class.getName() + + "-tmpDir"; defaultConf.set(TezConfiguration.TEZ_AM_STAGING_DIR, stagingDir); + defaultConf.setClass(TezConfiguration.TEZ_AM_TASK_ESTIMATOR_CLASS, + estimatorClass, + TaskRuntimeEstimator.class); + defaultConf.setInt(TezConfiguration.TEZ_AM_MINIMUM_ALLOWED_SPECULATIVE_TASKS, 20); + defaultConf.setDouble(TezConfiguration.TEZ_AM_PROPORTION_TOTAL_TASKS_SPECULATABLE, 0.2); + defaultConf.setDouble(TezConfiguration.TEZ_AM_PROPORTION_RUNNING_TASKS_SPECULATABLE, 0.25); + defaultConf.setLong(TezConfiguration.TEZ_AM_SOONEST_RETRY_AFTER_NO_SPECULATE, 25); + defaultConf.setLong(TezConfiguration.TEZ_AM_SOONEST_RETRY_AFTER_SPECULATE, 50); + defaultConf.setInt(TezConfiguration.TEZ_AM_ESTIMATOR_EXPONENTIAL_SKIP_INITIALS, 2); } catch (IOException e) { throw new RuntimeException("init failure", e); } } - + + /** + * Tear down. + */ + @After + public void tearDown() { + defaultConf = null; + try { + localFs.close(); + mockLauncher.shutdown(); + mockApp.close(); + } catch (Exception e) { + e.printStackTrace(); + } + } + + /** + * Gets test parameters. + * + * @return the test parameters + */ + @Parameterized.Parameters(name = "{index}: TaskEstimator(EstimatorClass {0})") + public static Collection getTestParameters() { + return Arrays.asList(new Object[][]{ + {SimpleExponentialTaskRuntimeEstimator.class}, + {LegacyTaskRuntimeEstimator.class} + }); + } + + private Class estimatorClass; + + /** + * Instantiates a new Test speculation. + * + * @param estimatorKlass the estimator klass + */ + public TestSpeculation(Class estimatorKlass) { + this.estimatorClass = estimatorKlass; + } + + /** + * Create tez session mock tez client. + * + * @return the mock tez client + * @throws Exception the exception + */ MockTezClient createTezSession() throws Exception { TezConfiguration tezconf = new TezConfiguration(defaultConf); AtomicBoolean mockAppLauncherGoFlag = new AtomicBoolean(false); @@ -84,8 +257,16 @@ MockTezClient createTezSession() throws Exception { syncWithMockAppLauncher(false, mockAppLauncherGoFlag, tezClient); return tezClient; } - - void syncWithMockAppLauncher(boolean allowScheduling, AtomicBoolean mockAppLauncherGoFlag, + + /** + * Sync with mock app launcher. + * + * @param allowScheduling the allow scheduling + * @param mockAppLauncherGoFlag the mock app launcher go flag + * @param tezClient the tez client + * @throws Exception the exception + */ + void syncWithMockAppLauncher(boolean allowScheduling, AtomicBoolean mockAppLauncherGoFlag, MockTezClient tezClient) throws Exception { synchronized (mockAppLauncherGoFlag) { while (!mockAppLauncherGoFlag.get()) { @@ -97,31 +278,96 @@ void syncWithMockAppLauncher(boolean allowScheduling, AtomicBoolean mockAppLaunc mockAppLauncherGoFlag.notify(); } } - + + /** + * Test single task speculation. + * + * @throws Exception the exception + */ + @Retry + @Test (timeout = 30000) + public void testSingleTaskSpeculation() throws Exception { + // Map + Map confToExpected = new HashMap(); + confToExpected.put(Long.MAX_VALUE >> 1, 1); // Really long time to speculate + confToExpected.put(100L, 2); + confToExpected.put(-1L, 1); // Don't speculate + defaultConf.setLong(TezConfiguration.TEZ_AM_SOONEST_RETRY_AFTER_NO_SPECULATE, 50); + for(Map.Entry entry : confToExpected.entrySet()) { + defaultConf.setLong( + TezConfiguration.TEZ_AM_LEGACY_SPECULATIVE_SINGLE_TASK_VERTEX_TIMEOUT, + entry.getKey()); + + DAG dag = DAG.create("DAG-testSingleTaskSpeculation"); + Vertex vA = Vertex.create("A", + ProcessorDescriptor.create("Proc.class"), + 1); + dag.addVertex(vA); + + MockTezClient tezClient = createTezSession(); + + DAGClient dagClient = tezClient.submitDAG(dag); + DAGImpl dagImpl = (DAGImpl) mockApp.getContext().getCurrentDAG(); + TezVertexID vertexId = TezVertexID.getInstance(dagImpl.getID(), 0); + // original attempt is killed and speculative one is successful + TezTaskAttemptID killedTaId = + TezTaskAttemptID.getInstance(TezTaskID.getInstance(vertexId, 0), 0); + TezTaskAttemptID successTaId = + TezTaskAttemptID.getInstance(TezTaskID.getInstance(vertexId, 0), 1); + Thread.sleep(200); + // cause speculation trigger + mockLauncher.setStatusUpdatesForTask(killedTaId, NUM_UPDATES_FOR_TEST_TASK); + + mockLauncher.startScheduling(true); + dagClient.waitForCompletion(); + Assert.assertEquals(DAGStatus.State.SUCCEEDED, dagClient.getDAGStatus(null).getState()); + Task task = dagImpl.getTask(killedTaId.getTaskID()); + Assert.assertEquals(entry.getValue().intValue(), task.getAttempts().size()); + if (entry.getValue() > 1) { + Assert.assertEquals(successTaId, task.getSuccessfulAttempt().getTaskAttemptID()); + TaskAttempt killedAttempt = task.getAttempt(killedTaId); + Joiner.on(",").join(killedAttempt.getDiagnostics()).contains("Killed as speculative attempt"); + Assert.assertEquals(TaskAttemptTerminationCause.TERMINATED_EFFECTIVE_SPECULATION, + killedAttempt.getTerminationCause()); + } + tezClient.stop(); + } + } + + /** + * Test basic speculation. + * + * @param withProgress the with progress + * @throws Exception the exception + */ public void testBasicSpeculation(boolean withProgress) throws Exception { - DAG dag = DAG.create("test"); - Vertex vA = Vertex.create("A", ProcessorDescriptor.create("Proc.class"), 5); + DAG dag = DAG.create("DAG-testBasicSpeculation"); + Vertex vA = Vertex.create("A", + ProcessorDescriptor.create("Proc.class"), 5); dag.addVertex(vA); MockTezClient tezClient = createTezSession(); - DAGClient dagClient = tezClient.submitDAG(dag); DAGImpl dagImpl = (DAGImpl) mockApp.getContext().getCurrentDAG(); TezVertexID vertexId = TezVertexID.getInstance(dagImpl.getID(), 0); // original attempt is killed and speculative one is successful - TezTaskAttemptID killedTaId = TezTaskAttemptID.getInstance(TezTaskID.getInstance(vertexId, 0), 0); - TezTaskAttemptID successTaId = TezTaskAttemptID.getInstance(TezTaskID.getInstance(vertexId, 0), 1); + TezTaskAttemptID killedTaId = + TezTaskAttemptID.getInstance(TezTaskID.getInstance(vertexId, 0), 0); + TezTaskAttemptID successTaId = + TezTaskAttemptID.getInstance(TezTaskID.getInstance(vertexId, 0), 1); mockLauncher.updateProgress(withProgress); // cause speculation trigger - mockLauncher.setStatusUpdatesForTask(killedTaId, 100); + mockLauncher.setStatusUpdatesForTask(killedTaId, NUM_UPDATES_FOR_TEST_TASK); mockLauncher.startScheduling(true); dagClient.waitForCompletion(); - Assert.assertEquals(DAGStatus.State.SUCCEEDED, dagClient.getDAGStatus(null).getState()); + Assert.assertEquals(DAGStatus.State.SUCCEEDED, + dagClient.getDAGStatus(null).getState()); Task task = dagImpl.getTask(killedTaId.getTaskID()); - Assert.assertEquals(2, task.getAttempts().size()); - Assert.assertEquals(successTaId, task.getSuccessfulAttempt().getID()); + Assert.assertEquals(ASSERT_SPECULATIONS_COUNT_MSG, 2, + task.getAttempts().size()); + Assert.assertEquals(successTaId, task.getSuccessfulAttempt().getTaskAttemptID()); TaskAttempt killedAttempt = task.getAttempt(killedTaId); Joiner.on(",").join(killedAttempt.getDiagnostics()).contains("Killed as speculative attempt"); Assert.assertEquals(TaskAttemptTerminationCause.TERMINATED_EFFECTIVE_SPECULATION, @@ -132,26 +378,53 @@ public void testBasicSpeculation(boolean withProgress) throws Exception { .getValue()); Assert.assertEquals(1, dagImpl.getAllCounters().findCounter(TaskCounter.NUM_SPECULATIONS) .getValue()); - org.apache.tez.dag.app.dag.Vertex v = dagImpl.getVertex(killedTaId.getTaskID().getVertexID()); + org.apache.tez.dag.app.dag.Vertex v = dagImpl.getVertex(killedTaId.getVertexID()); Assert.assertEquals(1, v.getAllCounters().findCounter(TaskCounter.NUM_SPECULATIONS) .getValue()); } + + LegacySpeculator speculator = + (LegacySpeculator)(dagImpl.getVertex(vA.getName())).getSpeculator(); + Assert.assertEquals(20, speculator.getMinimumAllowedSpeculativeTasks()); + Assert.assertEquals(.2, speculator.getProportionTotalTasksSpeculatable(), 0); + Assert.assertEquals(.25, speculator.getProportionRunningTasksSpeculatable(), 0); + Assert.assertEquals(25, speculator.getSoonestRetryAfterNoSpeculate()); + Assert.assertEquals(50, speculator.getSoonestRetryAfterSpeculate()); + tezClient.stop(); } - - @Test (timeout=10000) + + /** + * Test basic speculation with progress. + * + * @throws Exception the exception + */ + @Retry + @Test (timeout=30000) public void testBasicSpeculationWithProgress() throws Exception { testBasicSpeculation(true); } - @Test (timeout=10000) + /** + * Test basic speculation without progress. + * + * @throws Exception the exception + */ + @Retry + @Test (timeout=30000) public void testBasicSpeculationWithoutProgress() throws Exception { testBasicSpeculation(false); } - - @Test (timeout=10000) + + /** + * Test basic speculation per vertex conf. + * + * @throws Exception the exception + */ + @Retry + @Test (timeout=30000) public void testBasicSpeculationPerVertexConf() throws Exception { - DAG dag = DAG.create("test"); + DAG dag = DAG.create("DAG-testBasicSpeculationPerVertexConf"); String vNameNoSpec = "A"; String vNameSpec = "B"; Vertex vA = Vertex.create(vNameNoSpec, ProcessorDescriptor.create("Proc.class"), 5); @@ -167,38 +440,52 @@ public void testBasicSpeculationPerVertexConf() throws Exception { InputDescriptor.create("I")))); MockTezClient tezClient = createTezSession(); - + DAGClient dagClient = tezClient.submitDAG(dag); DAGImpl dagImpl = (DAGImpl) mockApp.getContext().getCurrentDAG(); - TezVertexID vertexId = dagImpl.getVertex(vNameSpec).getVertexId(); + TezVertexID vertexIdSpec = dagImpl.getVertex(vNameSpec).getVertexId(); TezVertexID vertexIdNoSpec = dagImpl.getVertex(vNameNoSpec).getVertexId(); // original attempt is killed and speculative one is successful - TezTaskAttemptID killedTaId = TezTaskAttemptID.getInstance(TezTaskID.getInstance(vertexId, 0), - 0); - TezTaskAttemptID noSpecTaId = TezTaskAttemptID + TezTaskAttemptID killedTaId = + TezTaskAttemptID.getInstance(TezTaskID.getInstance(vertexIdSpec, 0), 0); + TezTaskAttemptID successfulTaId = TezTaskAttemptID .getInstance(TezTaskID.getInstance(vertexIdNoSpec, 0), 0); // cause speculation trigger for both - mockLauncher.setStatusUpdatesForTask(killedTaId, 100); - mockLauncher.setStatusUpdatesForTask(noSpecTaId, 100); + mockLauncher.setStatusUpdatesForTask(killedTaId, NUM_UPDATES_FOR_TEST_TASK); + mockLauncher.setStatusUpdatesForTask(successfulTaId, NUM_UPDATES_FOR_TEST_TASK); mockLauncher.startScheduling(true); - dagClient.waitForCompletion(); - Assert.assertEquals(DAGStatus.State.SUCCEEDED, dagClient.getDAGStatus(null).getState()); - org.apache.tez.dag.app.dag.Vertex vSpec = dagImpl.getVertex(vertexId); + org.apache.tez.dag.app.dag.Vertex vSpec = dagImpl.getVertex(vertexIdSpec); org.apache.tez.dag.app.dag.Vertex vNoSpec = dagImpl.getVertex(vertexIdNoSpec); + // Wait enough time to give chance for the speculator to trigger + // speculation on VB. + // This would fail because of JUnit time out. + do { + Thread.sleep(100); + } while (vSpec.getAllCounters().findCounter(TaskCounter.NUM_SPECULATIONS) + .getValue() <= 0); + dagClient.waitForCompletion(); // speculation for vA but not for vB - Assert.assertTrue(vSpec.getAllCounters().findCounter(TaskCounter.NUM_SPECULATIONS) - .getValue() > 0); - Assert.assertEquals(0, vNoSpec.getAllCounters().findCounter(TaskCounter.NUM_SPECULATIONS) - .getValue()); + Assert.assertTrue("Num Speculations is not higher than 0", + vSpec.getAllCounters().findCounter(TaskCounter.NUM_SPECULATIONS) + .getValue() > 0); + Assert.assertEquals(0, + vNoSpec.getAllCounters().findCounter(TaskCounter.NUM_SPECULATIONS) + .getValue()); tezClient.stop(); } - @Test (timeout=10000) + /** + * Test basic speculation not useful. + * + * @throws Exception the exception + */ + @Retry + @Test (timeout=30000) public void testBasicSpeculationNotUseful() throws Exception { - DAG dag = DAG.create("test"); + DAG dag = DAG.create("DAG-testBasicSpeculationNotUseful"); Vertex vA = Vertex.create("A", ProcessorDescriptor.create("Proc.class"), 5); dag.addVertex(vA); @@ -211,15 +498,15 @@ public void testBasicSpeculationNotUseful() throws Exception { TezTaskAttemptID successTaId = TezTaskAttemptID.getInstance(TezTaskID.getInstance(vertexId, 0), 0); TezTaskAttemptID killedTaId = TezTaskAttemptID.getInstance(TezTaskID.getInstance(vertexId, 0), 1); - mockLauncher.setStatusUpdatesForTask(successTaId, 100); - mockLauncher.setStatusUpdatesForTask(killedTaId, 100); + mockLauncher.setStatusUpdatesForTask(successTaId, NUM_UPDATES_FOR_TEST_TASK); + mockLauncher.setStatusUpdatesForTask(killedTaId, NUM_UPDATES_FOR_TEST_TASK); mockLauncher.startScheduling(true); dagClient.waitForCompletion(); Assert.assertEquals(DAGStatus.State.SUCCEEDED, dagClient.getDAGStatus(null).getState()); Task task = dagImpl.getTask(killedTaId.getTaskID()); Assert.assertEquals(2, task.getAttempts().size()); - Assert.assertEquals(successTaId, task.getSuccessfulAttempt().getID()); + Assert.assertEquals(successTaId, task.getSuccessfulAttempt().getTaskAttemptID()); TaskAttempt killedAttempt = task.getAttempt(killedTaId); Joiner.on(",").join(killedAttempt.getDiagnostics()).contains("Killed speculative attempt as"); Assert.assertEquals(TaskAttemptTerminationCause.TERMINATED_INEFFECTIVE_SPECULATION, @@ -228,10 +515,9 @@ public void testBasicSpeculationNotUseful() throws Exception { .getValue()); Assert.assertEquals(1, dagImpl.getAllCounters().findCounter(TaskCounter.NUM_SPECULATIONS) .getValue()); - org.apache.tez.dag.app.dag.Vertex v = dagImpl.getVertex(killedTaId.getTaskID().getVertexID()); + org.apache.tez.dag.app.dag.Vertex v = dagImpl.getVertex(killedTaId.getVertexID()); Assert.assertEquals(1, v.getAllCounters().findCounter(TaskCounter.NUM_SPECULATIONS) .getValue()); tezClient.stop(); } - } diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/TestTaskCommunicatorContextImpl.java b/tez-dag/src/test/java/org/apache/tez/dag/app/TestTaskCommunicatorContextImpl.java index 9f9150f758..e73ccf02fe 100644 --- a/tez-dag/src/test/java/org/apache/tez/dag/app/TestTaskCommunicatorContextImpl.java +++ b/tez-dag/src/test/java/org/apache/tez/dag/app/TestTaskCommunicatorContextImpl.java @@ -28,7 +28,9 @@ import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.tez.common.ContainerSignatureMatcher; import org.apache.tez.serviceplugins.api.TaskCommunicatorContext; +import org.apache.tez.dag.app.dag.DAG; import org.apache.tez.dag.app.rm.container.AMContainerMap; +import org.junit.Assert; import org.junit.Test; public class TestTaskCommunicatorContextImpl { @@ -83,6 +85,30 @@ public void testIsKnownContainer() { taskCommContext1.containerAlive(containerId01); verify(tal, never()).containerAlive(containerId01); reset(tal); + } + + @Test + public void testTaskCommContextReachesDAGConf() { + Configuration conf = new Configuration(); + conf.set("dagkey", "dagvalue"); + + DAG dag = mock(DAG.class); + when(dag.getConf()).thenReturn(conf); + + // TaskCommunicatorContextImpl.dag is present + TaskCommunicatorContextImpl commContext = new TaskCommunicatorContextImpl(null, null, null, 0); + commContext.dag = dag; + + Assert.assertEquals("DAG config should be exposed via context.dag.getConf()", + commContext.getCurrentDagInfo().getConf().get("dagkey"), "dagvalue"); + + // TaskCommunicatorContextImpl.appContext.getCurrentDAG() is present + AppContext appContext = mock(AppContext.class); + when(appContext.getCurrentDAG()).thenReturn(dag); + commContext = new TaskCommunicatorContextImpl(appContext, null, null, 0); + Assert.assertEquals( + "DAG config should be exposed via context.appContext.getCurrentDAG().getConf()", + commContext.getCurrentDagInfo().getConf().get("dagkey"), "dagvalue"); } } diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/TestTaskCommunicatorManager.java b/tez-dag/src/test/java/org/apache/tez/dag/app/TestTaskCommunicatorManager.java index c7f97d3ea9..3935789860 100644 --- a/tez-dag/src/test/java/org/apache/tez/dag/app/TestTaskCommunicatorManager.java +++ b/tez-dag/src/test/java/org/apache/tez/dag/app/TestTaskCommunicatorManager.java @@ -18,10 +18,10 @@ import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; -import static org.mockito.Matchers.any; -import static org.mockito.Matchers.eq; import static org.mockito.Mockito.RETURNS_DEEP_STUBS; +import static org.mockito.Mockito.any; import static org.mockito.Mockito.doReturn; +import static org.mockito.Mockito.eq; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.reset; import static org.mockito.Mockito.spy; @@ -187,7 +187,7 @@ public void testEventRouting() throws Exception { AppContext appContext = mock(AppContext.class, RETURNS_DEEP_STUBS); NodeId nodeId = NodeId.newInstance("host1", 3131); - when(appContext.getAllContainers().get(any(ContainerId.class)).getContainer().getNodeId()) + when(appContext.getAllContainers().get(any()).getContainer().getNodeId()) .thenReturn(nodeId); TaskHeartbeatHandler thh = mock(TaskHeartbeatHandler.class); ContainerHeartbeatHandler chh = mock(ContainerHeartbeatHandler.class); @@ -348,7 +348,7 @@ public void testTaskCommunicatorUserError() { assertTrue(event.getDiagnosticInfo().contains(expectedId)); - when(appContext.getAllContainers().get(any(ContainerId.class)).getContainer().getNodeId()) + when(appContext.getAllContainers().get(any()).getContainer().getNodeId()) .thenReturn(mock(NodeId.class)); taskCommunicatorManager.registerRunningContainer(mock(ContainerId.class), 0); diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/TestTaskCommunicatorManager1.java b/tez-dag/src/test/java/org/apache/tez/dag/app/TestTaskCommunicatorManager1.java index 0f8afaafe6..d7e62ee530 100644 --- a/tez-dag/src/test/java/org/apache/tez/dag/app/TestTaskCommunicatorManager1.java +++ b/tez-dag/src/test/java/org/apache/tez/dag/app/TestTaskCommunicatorManager1.java @@ -19,7 +19,7 @@ import static org.junit.Assert.assertNull; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; -import static org.mockito.Matchers.any; +import static org.mockito.Mockito.any; import static org.mockito.Mockito.doReturn; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.times; @@ -144,7 +144,7 @@ public void setUp() throws TezException { AMContainer amContainer = mock(AMContainer.class); Container container = mock(Container.class); doReturn(nodeId).when(container).getNodeId(); - doReturn(amContainer).when(amContainerMap).get(any(ContainerId.class)); + doReturn(amContainer).when(amContainerMap).get(any()); doReturn(container).when(amContainer).getContainer(); Configuration conf = new TezConfiguration(); diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/TestTaskCommunicatorManager2.java b/tez-dag/src/test/java/org/apache/tez/dag/app/TestTaskCommunicatorManager2.java index bb7e94b5c7..ac60001a46 100644 --- a/tez-dag/src/test/java/org/apache/tez/dag/app/TestTaskCommunicatorManager2.java +++ b/tez-dag/src/test/java/org/apache/tez/dag/app/TestTaskCommunicatorManager2.java @@ -16,10 +16,10 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; -import static org.mockito.Matchers.any; -import static org.mockito.Matchers.anyInt; -import static org.mockito.Matchers.eq; +import static org.mockito.Mockito.any; +import static org.mockito.Mockito.anyInt; import static org.mockito.Mockito.doReturn; +import static org.mockito.Mockito.eq; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.reset; import static org.mockito.Mockito.times; @@ -256,7 +256,7 @@ private static class TaskCommunicatorManagerWrapperForTest { doReturn(dag).when(appContext).getCurrentDAG(); doReturn(vertex).when(dag).getVertex(eq(vertexId)); doReturn(new TaskAttemptEventInfo(0, new LinkedList(), 0)).when(vertex) - .getTaskAttemptTezEvents(any(TezTaskAttemptID.class), anyInt(), anyInt(), anyInt()); + .getTaskAttemptTezEvents(any(), anyInt(), anyInt(), anyInt()); doReturn(appAttemptId).when(appContext).getApplicationAttemptId(); doReturn(credentials).when(appContext).getAppCredentials(); doReturn(appAcls).when(appContext).getApplicationACLs(); @@ -267,7 +267,7 @@ private static class TaskCommunicatorManagerWrapperForTest { AMContainer amContainer = mock(AMContainer.class); Container container = mock(Container.class); doReturn(nodeId).when(container).getNodeId(); - doReturn(amContainer).when(amContainerMap).get(any(ContainerId.class)); + doReturn(amContainer).when(amContainerMap).get(any()); doReturn(container).when(amContainer).getContainer(); userPayload = TezUtils.createUserPayloadFromConf(conf); diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/dag/TestRootInputInitializerManager.java b/tez-dag/src/test/java/org/apache/tez/dag/app/dag/TestRootInputInitializerManager.java index b79b4afe68..2c5aa337b7 100644 --- a/tez-dag/src/test/java/org/apache/tez/dag/app/dag/TestRootInputInitializerManager.java +++ b/tez-dag/src/test/java/org/apache/tez/dag/app/dag/TestRootInputInitializerManager.java @@ -15,8 +15,8 @@ package org.apache.tez.dag.app.dag; import static org.junit.Assert.assertEquals; -import static org.mockito.Matchers.any; import static org.mockito.Mockito.RETURNS_DEEP_STUBS; +import static org.mockito.Mockito.any; import static org.mockito.Mockito.doReturn; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.never; @@ -101,7 +101,7 @@ public void testEventBeforeSuccess() throws Exception { eventList.add(te1); initializerWrapper.handleInputInitializerEvents(eventList); - verify(initializer, never()).handleInputInitializerEvent(any(List.class)); + verify(initializer, never()).handleInputInitializerEvent(any()); eventList.clear(); // First attempt, Task success notification @@ -123,7 +123,7 @@ public void testEventBeforeSuccess() throws Exception { eventList.add(te2); initializerWrapper.handleInputInitializerEvents(eventList); - verify(initializer, never()).handleInputInitializerEvent(any(List.class)); + verify(initializer, never()).handleInputInitializerEvent(any()); eventList.clear(); reset(initializer); @@ -176,7 +176,7 @@ public void testSuccessBeforeEvent() throws Exception { eventList.add(te1); initializerWrapper.handleInputInitializerEvents(eventList); - verify(initializer, never()).handleInputInitializerEvent(any(List.class)); + verify(initializer, never()).handleInputInitializerEvent(any()); eventList.clear(); // First attempt, Task success notification @@ -192,7 +192,7 @@ public void testSuccessBeforeEvent() throws Exception { TezTaskAttemptID srcTaskAttemptId12 = TezTaskAttemptID.getInstance(srcTaskId1, 2); // 2nd attempt succeeded initializerWrapper.onTaskSucceeded(srcVertexName, srcTaskId1, srcTaskAttemptId12.getId()); - verify(initializer, never()).handleInputInitializerEvent(any(List.class)); + verify(initializer, never()).handleInputInitializerEvent(any()); // 2nd attempt send event EventMetaData sourceInfo12 = @@ -203,7 +203,7 @@ public void testSuccessBeforeEvent() throws Exception { eventList.add(te2); initializerWrapper.handleInputInitializerEvents(eventList); - verify(initializer, never()).handleInputInitializerEvent(any(List.class)); + verify(initializer, never()).handleInputInitializerEvent(any()); } diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/dag/TestStateChangeNotifier.java b/tez-dag/src/test/java/org/apache/tez/dag/app/dag/TestStateChangeNotifier.java index d20903d348..b43782e165 100644 --- a/tez-dag/src/test/java/org/apache/tez/dag/app/dag/TestStateChangeNotifier.java +++ b/tez-dag/src/test/java/org/apache/tez/dag/app/dag/TestStateChangeNotifier.java @@ -20,7 +20,7 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.fail; -import static org.mockito.Matchers.any; +import static org.mockito.Mockito.any; import static org.mockito.Mockito.doReturn; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.never; @@ -139,14 +139,14 @@ public void testEventsOnRegistration() { verify(mockListener13, times(1)).onStateUpdated(argumentCaptor.capture()); assertEquals(VertexState.RUNNING, argumentCaptor.getValue().getVertexState()); - verify(mockListener14, never()).onStateUpdated(any(VertexStateUpdate.class)); + verify(mockListener14, never()).onStateUpdated(any()); // Vertex has not notified of state tracker.reset(); VertexStateUpdateListener mockListener2 = mock(VertexStateUpdateListener.class); tracker.registerForVertexUpdates(v2.getName(), null, mockListener2); Assert.assertEquals(0, tracker.totalCount.get()); // there should no be any event sent out - verify(mockListener2, never()).onStateUpdated(any(VertexStateUpdate.class)); + verify(mockListener2, never()).onStateUpdated(any()); // Vertex has notified about parallelism update only tracker.stateChanged(v3.getVertexId(), new VertexStateUpdateParallelismUpdated(v3.getName(), 23, -1)); diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestCommit.java b/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestCommit.java index 7611f1c44e..f4d2daaacb 100644 --- a/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestCommit.java +++ b/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestCommit.java @@ -167,7 +167,7 @@ private class VertexEventDispatcher implements EventHandler { @SuppressWarnings("unchecked") @Override public void handle(VertexEvent event) { - Vertex vertex = dag.getVertex(event.getVertexId()); + Vertex vertex = dag.getVertex(event.getVertexID()); ((EventHandler) vertex).handle(event); } } @@ -176,7 +176,7 @@ private class TaskEventDispatcher implements EventHandler { @SuppressWarnings("unchecked") @Override public void handle(TaskEvent event) { - Vertex vertex = dag.getVertex(event.getTaskID().getVertexID()); + Vertex vertex = dag.getVertex(event.getVertexID()); Task task = vertex.getTask(event.getTaskID()); ((EventHandler) task).handle(event); } @@ -400,7 +400,7 @@ private void waitForCommitCompleted(DAGImpl vertex, OutputKey outputKey) { // v2->v3 // vertex_group (v1, v2) private DAGPlan createDAGPlan(boolean vertexGroupCommitSucceeded, - boolean v3CommitSucceeded) throws Exception { + boolean v3CommitSucceeded, String dagName) throws Exception { LOG.info("Setting up group dag plan"); int dummyTaskCount = 1; Resource dummyTaskResource = Resource.newInstance(1, 1); @@ -414,7 +414,7 @@ private DAGPlan createDAGPlan(boolean vertexGroupCommitSucceeded, "vertex3", ProcessorDescriptor.create("Processor"), dummyTaskCount, dummyTaskResource); - DAG dag = DAG.create("testDag"); + DAG testDag = DAG.create("DAG-" + dagName); String groupName1 = "uv12"; OutputCommitterDescriptor ocd1 = OutputCommitterDescriptor.create( CountingOutputCommitter.class.getName()).setUserPayload( @@ -427,8 +427,8 @@ private DAGPlan createDAGPlan(boolean vertexGroupCommitSucceeded, .wrap(new CountingOutputCommitter.CountingOutputCommitterConfig( !v3CommitSucceeded, true).toUserPayload()))); - org.apache.tez.dag.api.VertexGroup uv12 = dag.createVertexGroup(groupName1, - v1, v2); + org.apache.tez.dag.api.VertexGroup uv12 = + testDag.createVertexGroup(groupName1, v1, v2); OutputDescriptor outDesc = OutputDescriptor.create("output.class"); uv12.addDataSink("v12Out", DataSinkDescriptor.create(outDesc, ocd1, null)); v3.addDataSink("v3Out", DataSinkDescriptor.create(outDesc, ocd2, null)); @@ -440,18 +440,19 @@ private DAGPlan createDAGPlan(boolean vertexGroupCommitSucceeded, InputDescriptor.create("dummy input class")), InputDescriptor .create("merge.class")); - dag.addVertex(v1); - dag.addVertex(v2); - dag.addVertex(v3); - dag.addEdge(e1); - return dag.createDag(conf, null, null, null, true); + testDag.addVertex(v1); + testDag.addVertex(v2); + testDag.addVertex(v3); + testDag.addEdge(e1); + return testDag.createDag(conf, null, null, null, true); } // v1->v3 // v2->v3 // vertex_group (v1, v2) has 2 shared outputs - private DAGPlan createDAGPlanWith2VertexGroupOutputs(boolean vertexGroupCommitSucceeded1, - boolean vertexGroupCommitSucceeded2, boolean v3CommitSucceeded) throws Exception { + private DAGPlan createDAGPlanWith2VertexGroupOutputs( + boolean vertexGroupCommitSucceeded1, boolean vertexGroupCommitSucceeded2, + boolean v3CommitSucceeded, String dagName) throws Exception { LOG.info("Setting up group dag plan"); int dummyTaskCount = 1; Resource dummyTaskResource = Resource.newInstance(1, 1); @@ -465,7 +466,7 @@ private DAGPlan createDAGPlanWith2VertexGroupOutputs(boolean vertexGroupCommitSu "vertex3", ProcessorDescriptor.create("Processor"), dummyTaskCount, dummyTaskResource); - DAG dag = DAG.create("testDag"); + DAG testDag = DAG.create("DAG-" + dagName); String groupName1 = "uv12"; OutputCommitterDescriptor ocd1 = OutputCommitterDescriptor.create( CountingOutputCommitter.class.getName()).setUserPayload( @@ -483,8 +484,8 @@ private DAGPlan createDAGPlanWith2VertexGroupOutputs(boolean vertexGroupCommitSu .wrap(new CountingOutputCommitter.CountingOutputCommitterConfig( !v3CommitSucceeded, true).toUserPayload()))); - org.apache.tez.dag.api.VertexGroup uv12 = dag.createVertexGroup(groupName1, - v1, v2); + org.apache.tez.dag.api.VertexGroup uv12 = + testDag.createVertexGroup(groupName1, v1, v2); OutputDescriptor outDesc = OutputDescriptor.create("output.class"); uv12.addDataSink("v12Out1", DataSinkDescriptor.create(outDesc, ocd1, null)); uv12.addDataSink("v12Out2", DataSinkDescriptor.create(outDesc, ocd2, null)); @@ -497,21 +498,22 @@ private DAGPlan createDAGPlanWith2VertexGroupOutputs(boolean vertexGroupCommitSu InputDescriptor.create("dummy input class")), InputDescriptor .create("merge.class")); - dag.addVertex(v1); - dag.addVertex(v2); - dag.addVertex(v3); - dag.addEdge(e1); - return dag.createDag(conf, null, null, null, true); + testDag.addVertex(v1); + testDag.addVertex(v2); + testDag.addVertex(v3); + testDag.addEdge(e1); + return testDag.createDag(conf, null, null, null, true); } private DAGPlan createDAGPlan_SingleVertexWith2Committer( - boolean commit1Succeed, boolean commit2Succeed) throws IOException { - return createDAGPlan_SingleVertexWith2Committer(commit1Succeed, commit2Succeed, false); + boolean commit1Succeed, boolean commit2Succeed, String dagName) throws IOException { + return createDAGPlan_SingleVertexWith2Committer(commit1Succeed, commit2Succeed, false, dagName); } // used for route event error in VM - private DAGPlan createDAGPlan_SingleVertexWith2Committer - (boolean commit1Succeed, boolean commit2Succeed, boolean customVM) throws IOException { + private DAGPlan createDAGPlan_SingleVertexWith2Committer( + boolean commit1Succeed, boolean commit2Succeed, boolean customVM, + String dagName) throws IOException { LOG.info("Setting up group dag plan"); int dummyTaskCount = 1; Resource dummyTaskResource = Resource.newInstance(1, 1); @@ -534,12 +536,12 @@ private DAGPlan createDAGPlan_SingleVertexWith2Committer( .wrap(new CountingOutputCommitter.CountingOutputCommitterConfig( !commit2Succeed, true).toUserPayload()))); - DAG dag = DAG.create("testDag"); - dag.addVertex(v1); + DAG testDag = DAG.create("DAG-" + dagName); + testDag.addVertex(v1); OutputDescriptor outDesc = OutputDescriptor.create("output.class"); v1.addDataSink("v1Out_1", DataSinkDescriptor.create(outDesc, ocd1, null)); v1.addDataSink("v1Out_2", DataSinkDescriptor.create(outDesc, ocd2, null)); - return dag.createDag(conf, null, null, null, true); + return testDag.createDag(conf, null, null, null, true); } private void initDAG(DAGImpl dag) { @@ -559,11 +561,12 @@ private void startDAG(DAGImpl impl) { public void testVertexCommit_OnDAGSuccess() throws Exception { conf.setBoolean(TezConfiguration.TEZ_AM_COMMIT_ALL_OUTPUTS_ON_DAG_SUCCESS, true); - setupDAG(createDAGPlan_SingleVertexWith2Committer(true, true)); + setupDAG(createDAGPlan_SingleVertexWith2Committer(true, true, + "testVertexCommit_OnDAGSuccess")); initDAG(dag); startDAG(dag); VertexImpl v1 = (VertexImpl) dag.getVertex("vertex1"); - v1.handle(new VertexEventTaskCompleted(v1.getTask(0).getTaskId(), + v1.handle(new VertexEventTaskCompleted(v1.getTask(0).getTaskID(), TaskState.SUCCEEDED)); Assert.assertEquals(VertexState.SUCCEEDED, v1.getState()); Assert.assertNull(v1.getTerminationCause()); @@ -590,11 +593,12 @@ public void testVertexCommit_OnDAGSuccess() throws Exception { public void testVertexCommit_OnVertexSuccess() throws Exception { conf.setBoolean(TezConfiguration.TEZ_AM_COMMIT_ALL_OUTPUTS_ON_DAG_SUCCESS, false); - setupDAG(createDAGPlan_SingleVertexWith2Committer(true, true)); + setupDAG(createDAGPlan_SingleVertexWith2Committer(true, true, + "testVertexCommit_OnVertexSuccess")); initDAG(dag); startDAG(dag); VertexImpl v1 = (VertexImpl) dag.getVertex("vertex1"); - v1.handle(new VertexEventTaskCompleted(v1.getTask(0).getTaskId(), + v1.handle(new VertexEventTaskCompleted(v1.getTask(0).getTaskID(), TaskState.SUCCEEDED)); Assert.assertEquals(VertexState.COMMITTING, v1.getState()); CountingOutputCommitter v1OutputCommitter_1 = (CountingOutputCommitter) v1 @@ -629,11 +633,12 @@ public void testVertexCommit_OnVertexSuccess() throws Exception { public void testVertexCommitFail1_OnVertexSuccess() throws Exception { conf.setBoolean(TezConfiguration.TEZ_AM_COMMIT_ALL_OUTPUTS_ON_DAG_SUCCESS, false); - setupDAG(createDAGPlan_SingleVertexWith2Committer(false, true)); + setupDAG(createDAGPlan_SingleVertexWith2Committer(false, true, + "testVertexCommitFail1_OnVertexSuccess")); initDAG(dag); startDAG(dag); VertexImpl v1 = (VertexImpl) dag.getVertex("vertex1"); - v1.handle(new VertexEventTaskCompleted(v1.getTask(0).getTaskId(), + v1.handle(new VertexEventTaskCompleted(v1.getTask(0).getTaskID(), TaskState.SUCCEEDED)); Assert.assertEquals(VertexState.COMMITTING, v1.getState()); CountingOutputCommitter v1OutputCommitter_1 = (CountingOutputCommitter) v1 @@ -665,11 +670,12 @@ public void testVertexCommitFail1_OnVertexSuccess() throws Exception { public void testVertexCommitFail2_OnVertexSuccess() throws Exception { conf.setBoolean(TezConfiguration.TEZ_AM_COMMIT_ALL_OUTPUTS_ON_DAG_SUCCESS, false); - setupDAG(createDAGPlan_SingleVertexWith2Committer(true, false)); + setupDAG(createDAGPlan_SingleVertexWith2Committer(true, false, + "testVertexCommitFail2_OnVertexSuccess")); initDAG(dag); startDAG(dag); VertexImpl v1 = (VertexImpl) dag.getVertex("vertex1"); - v1.handle(new VertexEventTaskCompleted(v1.getTask(0).getTaskId(), + v1.handle(new VertexEventTaskCompleted(v1.getTask(0).getTaskID(), TaskState.SUCCEEDED)); Assert.assertEquals(VertexState.COMMITTING, v1.getState()); CountingOutputCommitter v1OutputCommitter_1 = (CountingOutputCommitter) v1 @@ -703,11 +709,12 @@ public void testVertexCommitFail2_OnVertexSuccess() throws Exception { public void testVertexKilledWhileCommitting() throws Exception { conf.setBoolean(TezConfiguration.TEZ_AM_COMMIT_ALL_OUTPUTS_ON_DAG_SUCCESS, false); - setupDAG(createDAGPlan_SingleVertexWith2Committer(true, true)); + setupDAG(createDAGPlan_SingleVertexWith2Committer(true, true, + "testVertexKilledWhileCommitting")); initDAG(dag); startDAG(dag); VertexImpl v1 = (VertexImpl) dag.getVertex("vertex1"); - v1.handle(new VertexEventTaskCompleted(v1.getTask(0).getTaskId(), + v1.handle(new VertexEventTaskCompleted(v1.getTask(0).getTaskID(), TaskState.SUCCEEDED)); Assert.assertEquals(VertexState.COMMITTING, v1.getState()); // kill dag which will trigger the vertex killed event @@ -742,11 +749,12 @@ public void testVertexKilledWhileCommitting() throws Exception { public void testVertexRescheduleWhileCommitting() throws Exception { conf.setBoolean(TezConfiguration.TEZ_AM_COMMIT_ALL_OUTPUTS_ON_DAG_SUCCESS, false); - setupDAG(createDAGPlan_SingleVertexWith2Committer(true, true)); + setupDAG(createDAGPlan_SingleVertexWith2Committer(true, true, + "testVertexRescheduleWhileCommitting")); initDAG(dag); startDAG(dag); VertexImpl v1 = (VertexImpl) dag.getVertex("vertex1"); - v1.handle(new VertexEventTaskCompleted(v1.getTask(0).getTaskId(), + v1.handle(new VertexEventTaskCompleted(v1.getTask(0).getTaskID(), TaskState.SUCCEEDED)); Assert.assertEquals(VertexState.COMMITTING, v1.getState()); // reschedule task @@ -782,11 +790,12 @@ public void testVertexRescheduleWhileCommitting() throws Exception { public void testVertexRouteEventErrorWhileCommitting() throws Exception { conf.setBoolean(TezConfiguration.TEZ_AM_COMMIT_ALL_OUTPUTS_ON_DAG_SUCCESS, false); - setupDAG(createDAGPlan_SingleVertexWith2Committer(true, true, true)); + setupDAG(createDAGPlan_SingleVertexWith2Committer(true, true, true, + "testVertexRouteEventErrorWhileCommitting")); initDAG(dag); startDAG(dag); VertexImpl v1 = (VertexImpl) dag.getVertex("vertex1"); - v1.handle(new VertexEventTaskCompleted(v1.getTask(0).getTaskId(), + v1.handle(new VertexEventTaskCompleted(v1.getTask(0).getTaskID(), TaskState.SUCCEEDED)); Assert.assertEquals(VertexState.COMMITTING, v1.getState()); // reschedule task @@ -827,11 +836,12 @@ public void testVertexRouteEventErrorWhileCommitting() throws Exception { public void testVertexInternalErrorWhileCommiting() throws Exception { conf.setBoolean(TezConfiguration.TEZ_AM_COMMIT_ALL_OUTPUTS_ON_DAG_SUCCESS, false); - setupDAG(createDAGPlan_SingleVertexWith2Committer(true, true)); + setupDAG(createDAGPlan_SingleVertexWith2Committer(true, true, + "testVertexInternalErrorWhileCommiting")); initDAG(dag); startDAG(dag); VertexImpl v1 = (VertexImpl) dag.getVertex("vertex1"); - v1.handle(new VertexEventTaskCompleted(v1.getTask(0).getTaskId(), + v1.handle(new VertexEventTaskCompleted(v1.getTask(0).getTaskID(), TaskState.SUCCEEDED)); Assert.assertEquals(VertexState.COMMITTING, v1.getState()); // internal error @@ -868,18 +878,18 @@ public void testVertexInternalErrorWhileCommiting() throws Exception { public void testDAGCommitSucceeded_OnDAGSuccess() throws Exception { conf.setBoolean(TezConfiguration.TEZ_AM_COMMIT_ALL_OUTPUTS_ON_DAG_SUCCESS, true); - setupDAG(createDAGPlan(true, true)); + setupDAG(createDAGPlan(true, true, "testDAGCommitSucceeded_OnDAGSuccess")); initDAG(dag); startDAG(dag); VertexImpl v1 = (VertexImpl) dag.getVertex("vertex1"); VertexImpl v2 = (VertexImpl) dag.getVertex("vertex2"); VertexImpl v3 = (VertexImpl) dag.getVertex("vertex3"); // need to make vertices to go to SUCCEEDED - v1.handle(new VertexEventTaskCompleted(v1.getTask(0).getTaskId(), + v1.handle(new VertexEventTaskCompleted(v1.getTask(0).getTaskID(), TaskState.SUCCEEDED)); - v2.handle(new VertexEventTaskCompleted(v2.getTask(0).getTaskId(), + v2.handle(new VertexEventTaskCompleted(v2.getTask(0).getTaskID(), TaskState.SUCCEEDED)); - v3.handle(new VertexEventTaskCompleted(v3.getTask(0).getTaskId(), + v3.handle(new VertexEventTaskCompleted(v3.getTask(0).getTaskID(), TaskState.SUCCEEDED)); waitUntil(dag, DAGState.COMMITTING); CountingOutputCommitter v12OutputCommitter = (CountingOutputCommitter) v1 @@ -924,18 +934,18 @@ public void testDAGCommitSucceeded_OnDAGSuccess() throws Exception { public void testDAGCommitFail1_OnDAGSuccess() throws Exception { conf.setBoolean(TezConfiguration.TEZ_AM_COMMIT_ALL_OUTPUTS_ON_DAG_SUCCESS, true); - setupDAG(createDAGPlan(true, false)); + setupDAG(createDAGPlan(true, false, "testDAGCommitFail1_OnDAGSuccess")); initDAG(dag); startDAG(dag); VertexImpl v1 = (VertexImpl) dag.getVertex("vertex1"); VertexImpl v2 = (VertexImpl) dag.getVertex("vertex2"); VertexImpl v3 = (VertexImpl) dag.getVertex("vertex3"); // need to make vertices to go to SUCCEEDED - v1.handle(new VertexEventTaskCompleted(v1.getTask(0).getTaskId(), + v1.handle(new VertexEventTaskCompleted(v1.getTask(0).getTaskID(), TaskState.SUCCEEDED)); - v2.handle(new VertexEventTaskCompleted(v2.getTask(0).getTaskId(), + v2.handle(new VertexEventTaskCompleted(v2.getTask(0).getTaskID(), TaskState.SUCCEEDED)); - v3.handle(new VertexEventTaskCompleted(v3.getTask(0).getTaskId(), + v3.handle(new VertexEventTaskCompleted(v3.getTask(0).getTaskID(), TaskState.SUCCEEDED)); waitUntil(dag, DAGState.COMMITTING); @@ -985,18 +995,18 @@ public void testDAGCommitFail1_OnDAGSuccess() throws Exception { public void testDAGCommitFail2_OnDAGSuccess() throws Exception { conf.setBoolean(TezConfiguration.TEZ_AM_COMMIT_ALL_OUTPUTS_ON_DAG_SUCCESS, true); - setupDAG(createDAGPlan(false, true)); + setupDAG(createDAGPlan(false, true, "testDAGCommitFail2_OnDAGSuccess")); initDAG(dag); startDAG(dag); VertexImpl v1 = (VertexImpl) dag.getVertex("vertex1"); VertexImpl v2 = (VertexImpl) dag.getVertex("vertex2"); VertexImpl v3 = (VertexImpl) dag.getVertex("vertex3"); // need to make vertices to go to SUCCEEDED - v1.handle(new VertexEventTaskCompleted(v1.getTask(0).getTaskId(), + v1.handle(new VertexEventTaskCompleted(v1.getTask(0).getTaskID(), TaskState.SUCCEEDED)); - v2.handle(new VertexEventTaskCompleted(v2.getTask(0).getTaskId(), + v2.handle(new VertexEventTaskCompleted(v2.getTask(0).getTaskID(), TaskState.SUCCEEDED)); - v3.handle(new VertexEventTaskCompleted(v3.getTask(0).getTaskId(), + v3.handle(new VertexEventTaskCompleted(v3.getTask(0).getTaskID(), TaskState.SUCCEEDED)); waitUntil(dag, DAGState.COMMITTING); @@ -1045,18 +1055,18 @@ public void testDAGCommitFail2_OnDAGSuccess() throws Exception { public void testDAGCommitSucceeded1_OnVertexSuccess() throws Exception { conf.setBoolean(TezConfiguration.TEZ_AM_COMMIT_ALL_OUTPUTS_ON_DAG_SUCCESS, false); - setupDAG(createDAGPlan(true, true)); + setupDAG(createDAGPlan(true, true, "testDAGCommitSucceeded1_OnVertexSuccess")); initDAG(dag); startDAG(dag); VertexImpl v1 = (VertexImpl) dag.getVertex("vertex1"); VertexImpl v2 = (VertexImpl) dag.getVertex("vertex2"); VertexImpl v3 = (VertexImpl) dag.getVertex("vertex3"); - v1.handle(new VertexEventTaskCompleted(v1.getTask(0).getTaskId(), + v1.handle(new VertexEventTaskCompleted(v1.getTask(0).getTaskID(), TaskState.SUCCEEDED)); - v2.handle(new VertexEventTaskCompleted(v2.getTask(0).getTaskId(), + v2.handle(new VertexEventTaskCompleted(v2.getTask(0).getTaskID(), TaskState.SUCCEEDED)); - v3.handle(new VertexEventTaskCompleted(v3.getTask(0).getTaskId(), + v3.handle(new VertexEventTaskCompleted(v3.getTask(0).getTaskID(), TaskState.SUCCEEDED)); Assert.assertEquals(VertexState.SUCCEEDED, v1.getState()); Assert.assertEquals(VertexState.SUCCEEDED, v2.getState()); @@ -1103,18 +1113,18 @@ public void testDAGCommitSucceeded1_OnVertexSuccess() throws Exception { public void testDAGCommitSucceeded2_OnVertexSuccess() throws Exception { conf.setBoolean(TezConfiguration.TEZ_AM_COMMIT_ALL_OUTPUTS_ON_DAG_SUCCESS, false); - setupDAG(createDAGPlan(true, true)); + setupDAG(createDAGPlan(true, true, "testDAGCommitSucceeded2_OnVertexSuccess")); initDAG(dag); startDAG(dag); VertexImpl v1 = (VertexImpl) dag.getVertex("vertex1"); VertexImpl v2 = (VertexImpl) dag.getVertex("vertex2"); VertexImpl v3 = (VertexImpl) dag.getVertex("vertex3"); - v1.handle(new VertexEventTaskCompleted(v1.getTask(0).getTaskId(), + v1.handle(new VertexEventTaskCompleted(v1.getTask(0).getTaskID(), TaskState.SUCCEEDED)); - v2.handle(new VertexEventTaskCompleted(v2.getTask(0).getTaskId(), + v2.handle(new VertexEventTaskCompleted(v2.getTask(0).getTaskID(), TaskState.SUCCEEDED)); - v3.handle(new VertexEventTaskCompleted(v3.getTask(0).getTaskId(), + v3.handle(new VertexEventTaskCompleted(v3.getTask(0).getTaskID(), TaskState.SUCCEEDED)); Assert.assertEquals(VertexState.SUCCEEDED, v1.getState()); Assert.assertEquals(VertexState.SUCCEEDED, v2.getState()); @@ -1163,18 +1173,18 @@ public void testDAGCommitSucceeded2_OnVertexSuccess() throws Exception { public void testDAGCommitSucceeded3_OnVertexSuccess() throws Exception { conf.setBoolean(TezConfiguration.TEZ_AM_COMMIT_ALL_OUTPUTS_ON_DAG_SUCCESS, false); - setupDAG(createDAGPlanWith2VertexGroupOutputs(true, true, true)); + setupDAG(createDAGPlanWith2VertexGroupOutputs(true, true, true, "testDAGCommitSucceeded3_OnVertexSuccess")); initDAG(dag); startDAG(dag); VertexImpl v1 = (VertexImpl) dag.getVertex("vertex1"); VertexImpl v2 = (VertexImpl) dag.getVertex("vertex2"); VertexImpl v3 = (VertexImpl) dag.getVertex("vertex3"); - v1.handle(new VertexEventTaskCompleted(v1.getTask(0).getTaskId(), + v1.handle(new VertexEventTaskCompleted(v1.getTask(0).getTaskID(), TaskState.SUCCEEDED)); - v2.handle(new VertexEventTaskCompleted(v2.getTask(0).getTaskId(), + v2.handle(new VertexEventTaskCompleted(v2.getTask(0).getTaskID(), TaskState.SUCCEEDED)); - v3.handle(new VertexEventTaskCompleted(v3.getTask(0).getTaskId(), + v3.handle(new VertexEventTaskCompleted(v3.getTask(0).getTaskID(), TaskState.SUCCEEDED)); Assert.assertEquals(VertexState.SUCCEEDED, v1.getState()); Assert.assertEquals(VertexState.SUCCEEDED, v2.getState()); @@ -1228,18 +1238,18 @@ public void testDAGCommitSucceeded3_OnVertexSuccess() throws Exception { public void testDAGCommitFail1_OnVertexSuccess() throws Exception { conf.setBoolean(TezConfiguration.TEZ_AM_COMMIT_ALL_OUTPUTS_ON_DAG_SUCCESS, false); - setupDAG(createDAGPlan(false, true)); + setupDAG(createDAGPlan(false, true, "testDAGCommitFail1_OnVertexSuccess")); initDAG(dag); startDAG(dag); VertexImpl v1 = (VertexImpl) dag.getVertex("vertex1"); VertexImpl v2 = (VertexImpl) dag.getVertex("vertex2"); VertexImpl v3 = (VertexImpl) dag.getVertex("vertex3"); - v1.handle(new VertexEventTaskCompleted(v1.getTask(0).getTaskId(), + v1.handle(new VertexEventTaskCompleted(v1.getTask(0).getTaskID(), TaskState.SUCCEEDED)); - v2.handle(new VertexEventTaskCompleted(v2.getTask(0).getTaskId(), + v2.handle(new VertexEventTaskCompleted(v2.getTask(0).getTaskID(), TaskState.SUCCEEDED)); - v3.handle(new VertexEventTaskCompleted(v3.getTask(0).getTaskId(), + v3.handle(new VertexEventTaskCompleted(v3.getTask(0).getTaskID(), TaskState.SUCCEEDED)); Assert.assertEquals(VertexState.SUCCEEDED, v1.getState()); Assert.assertEquals(VertexState.SUCCEEDED, v2.getState()); @@ -1287,18 +1297,18 @@ public void testDAGCommitFail1_OnVertexSuccess() throws Exception { public void testDAGCommitFail2_OnVertexSuccess() throws Exception { conf.setBoolean(TezConfiguration.TEZ_AM_COMMIT_ALL_OUTPUTS_ON_DAG_SUCCESS, false); - setupDAG(createDAGPlan(true, false)); + setupDAG(createDAGPlan(true, false, "testDAGCommitFail2_OnVertexSuccess")); initDAG(dag); startDAG(dag); VertexImpl v1 = (VertexImpl) dag.getVertex("vertex1"); VertexImpl v2 = (VertexImpl) dag.getVertex("vertex2"); VertexImpl v3 = (VertexImpl) dag.getVertex("vertex3"); - v1.handle(new VertexEventTaskCompleted(v1.getTask(0).getTaskId(), + v1.handle(new VertexEventTaskCompleted(v1.getTask(0).getTaskID(), TaskState.SUCCEEDED)); - v2.handle(new VertexEventTaskCompleted(v2.getTask(0).getTaskId(), + v2.handle(new VertexEventTaskCompleted(v2.getTask(0).getTaskID(), TaskState.SUCCEEDED)); - v3.handle(new VertexEventTaskCompleted(v3.getTask(0).getTaskId(), + v3.handle(new VertexEventTaskCompleted(v3.getTask(0).getTaskID(), TaskState.SUCCEEDED)); Assert.assertEquals(VertexState.SUCCEEDED, v1.getState()); Assert.assertEquals(VertexState.SUCCEEDED, v2.getState()); @@ -1346,18 +1356,18 @@ public void testDAGCommitFail2_OnVertexSuccess() throws Exception { public void testDAGCommitFail3_OnVertexSuccess() throws Exception { conf.setBoolean(TezConfiguration.TEZ_AM_COMMIT_ALL_OUTPUTS_ON_DAG_SUCCESS, false); - setupDAG(createDAGPlan(true, false)); + setupDAG(createDAGPlan(true, false, "testDAGCommitFail3_OnVertexSuccess")); initDAG(dag); startDAG(dag); VertexImpl v1 = (VertexImpl) dag.getVertex("vertex1"); VertexImpl v2 = (VertexImpl) dag.getVertex("vertex2"); VertexImpl v3 = (VertexImpl) dag.getVertex("vertex3"); - v1.handle(new VertexEventTaskCompleted(v1.getTask(0).getTaskId(), + v1.handle(new VertexEventTaskCompleted(v1.getTask(0).getTaskID(), TaskState.SUCCEEDED)); - v2.handle(new VertexEventTaskCompleted(v2.getTask(0).getTaskId(), + v2.handle(new VertexEventTaskCompleted(v2.getTask(0).getTaskID(), TaskState.SUCCEEDED)); - v3.handle(new VertexEventTaskCompleted(v3.getTask(0).getTaskId(), + v3.handle(new VertexEventTaskCompleted(v3.getTask(0).getTaskID(), TaskState.SUCCEEDED)); Assert.assertEquals(VertexState.SUCCEEDED, v1.getState()); Assert.assertEquals(VertexState.SUCCEEDED, v2.getState()); @@ -1409,18 +1419,18 @@ public void testDAGCommitFail3_OnVertexSuccess() throws Exception { public void testDAGCommitFail4_OnVertexSuccess() throws Exception { conf.setBoolean(TezConfiguration.TEZ_AM_COMMIT_ALL_OUTPUTS_ON_DAG_SUCCESS, false); - setupDAG(createDAGPlan(false, true)); + setupDAG(createDAGPlan(false, true, "testDAGCommitFail4_OnVertexSuccess")); initDAG(dag); startDAG(dag); VertexImpl v1 = (VertexImpl) dag.getVertex("vertex1"); VertexImpl v2 = (VertexImpl) dag.getVertex("vertex2"); VertexImpl v3 = (VertexImpl) dag.getVertex("vertex3"); - v1.handle(new VertexEventTaskCompleted(v1.getTask(0).getTaskId(), + v1.handle(new VertexEventTaskCompleted(v1.getTask(0).getTaskID(), TaskState.SUCCEEDED)); - v2.handle(new VertexEventTaskCompleted(v2.getTask(0).getTaskId(), + v2.handle(new VertexEventTaskCompleted(v2.getTask(0).getTaskID(), TaskState.SUCCEEDED)); - v3.handle(new VertexEventTaskCompleted(v3.getTask(0).getTaskId(), + v3.handle(new VertexEventTaskCompleted(v3.getTask(0).getTaskID(), TaskState.SUCCEEDED)); Assert.assertEquals(VertexState.SUCCEEDED, v1.getState()); Assert.assertEquals(VertexState.SUCCEEDED, v2.getState()); @@ -1468,18 +1478,18 @@ public void testDAGCommitFail4_OnVertexSuccess() throws Exception { public void testDAGInternalErrorWhileCommiting_OnDAGSuccess() throws Exception { conf.setBoolean(TezConfiguration.TEZ_AM_COMMIT_ALL_OUTPUTS_ON_DAG_SUCCESS, true); - setupDAG(createDAGPlan(true, true)); + setupDAG(createDAGPlan(true, true, "testDAGInternalErrorWhileCommiting_OnDAGSuccess")); initDAG(dag); startDAG(dag); VertexImpl v1 = (VertexImpl) dag.getVertex("vertex1"); VertexImpl v2 = (VertexImpl) dag.getVertex("vertex2"); VertexImpl v3 = (VertexImpl) dag.getVertex("vertex3"); // need to make vertices to go to SUCCEEDED - v1.handle(new VertexEventTaskCompleted(v1.getTask(0).getTaskId(), + v1.handle(new VertexEventTaskCompleted(v1.getTask(0).getTaskID(), TaskState.SUCCEEDED)); - v2.handle(new VertexEventTaskCompleted(v2.getTask(0).getTaskId(), + v2.handle(new VertexEventTaskCompleted(v2.getTask(0).getTaskID(), TaskState.SUCCEEDED)); - v3.handle(new VertexEventTaskCompleted(v3.getTask(0).getTaskId(), + v3.handle(new VertexEventTaskCompleted(v3.getTask(0).getTaskID(), TaskState.SUCCEEDED)); waitUntil(dag, DAGState.COMMITTING); dag.handle(new DAGEvent(dag.getID(), DAGEventType.INTERNAL_ERROR)); @@ -1530,18 +1540,18 @@ public void testServiceErrorWhileCommitting1_OnDAGSuccess() throws Exception { private void _testDAGTerminatedWhileCommitting1_OnDAGSuccess(DAGTerminationCause terminationCause) throws Exception { conf.setBoolean(TezConfiguration.TEZ_AM_COMMIT_ALL_OUTPUTS_ON_DAG_SUCCESS, true); - setupDAG(createDAGPlan(true, true)); + setupDAG(createDAGPlan(true, true, "_testDAGTerminatedWhileCommitting1_OnDAGSuccess")); initDAG(dag); startDAG(dag); VertexImpl v1 = (VertexImpl) dag.getVertex("vertex1"); VertexImpl v2 = (VertexImpl) dag.getVertex("vertex2"); VertexImpl v3 = (VertexImpl) dag.getVertex("vertex3"); // need to make vertices to go to SUCCEEDED - v1.handle(new VertexEventTaskCompleted(v1.getTask(0).getTaskId(), + v1.handle(new VertexEventTaskCompleted(v1.getTask(0).getTaskID(), TaskState.SUCCEEDED)); - v2.handle(new VertexEventTaskCompleted(v2.getTask(0).getTaskId(), + v2.handle(new VertexEventTaskCompleted(v2.getTask(0).getTaskID(), TaskState.SUCCEEDED)); - v3.handle(new VertexEventTaskCompleted(v3.getTask(0).getTaskId(), + v3.handle(new VertexEventTaskCompleted(v3.getTask(0).getTaskID(), TaskState.SUCCEEDED)); waitUntil(dag, DAGState.COMMITTING); dag.handle(new DAGEventTerminateDag(dag.getID(), terminationCause, null)); @@ -1595,18 +1605,18 @@ public void testServiceErrorWhileCommitting1_OnVertexSuccess() throws Exception private void _testDAGTerminatedWhileCommitting1_OnVertexSuccess(DAGTerminationCause terminationCause) throws Exception { conf.setBoolean(TezConfiguration.TEZ_AM_COMMIT_ALL_OUTPUTS_ON_DAG_SUCCESS, false); - setupDAG(createDAGPlan(true, true)); + setupDAG(createDAGPlan(true, true, "_testDAGTerminatedWhileCommitting1_OnVertexSuccess")); initDAG(dag); startDAG(dag); VertexImpl v1 = (VertexImpl) dag.getVertex("vertex1"); VertexImpl v2 = (VertexImpl) dag.getVertex("vertex2"); VertexImpl v3 = (VertexImpl) dag.getVertex("vertex3"); // need to make vertices to go to SUCCEEDED - v1.handle(new VertexEventTaskCompleted(v1.getTask(0).getTaskId(), + v1.handle(new VertexEventTaskCompleted(v1.getTask(0).getTaskID(), TaskState.SUCCEEDED)); - v2.handle(new VertexEventTaskCompleted(v2.getTask(0).getTaskId(), + v2.handle(new VertexEventTaskCompleted(v2.getTask(0).getTaskID(), TaskState.SUCCEEDED)); - v3.handle(new VertexEventTaskCompleted(v3.getTask(0).getTaskId(), + v3.handle(new VertexEventTaskCompleted(v3.getTask(0).getTaskID(), TaskState.SUCCEEDED)); Assert.assertEquals(VertexState.COMMITTING, v3.getState()); // dag is still in RUNNING because v3 has not completed @@ -1665,18 +1675,18 @@ public void testServiceErrorWhileRunning_OnVertexSuccess() throws Exception { private void _testDAGKilledWhileRunning_OnVertexSuccess(DAGTerminationCause terminationCause) throws Exception { conf.setBoolean(TezConfiguration.TEZ_AM_COMMIT_ALL_OUTPUTS_ON_DAG_SUCCESS, false); - setupDAG(createDAGPlan(true, true)); + setupDAG(createDAGPlan(true, true, "_testDAGKilledWhileRunning_OnVertexSuccess")); initDAG(dag); startDAG(dag); VertexImpl v1 = (VertexImpl) dag.getVertex("vertex1"); VertexImpl v2 = (VertexImpl) dag.getVertex("vertex2"); VertexImpl v3 = (VertexImpl) dag.getVertex("vertex3"); // need to make vertices to go to SUCCEEDED - v1.handle(new VertexEventTaskCompleted(v1.getTask(0).getTaskId(), + v1.handle(new VertexEventTaskCompleted(v1.getTask(0).getTaskID(), TaskState.SUCCEEDED)); - v2.handle(new VertexEventTaskCompleted(v2.getTask(0).getTaskId(), + v2.handle(new VertexEventTaskCompleted(v2.getTask(0).getTaskID(), TaskState.SUCCEEDED)); - v3.handle(new VertexEventTaskCompleted(v3.getTask(0).getTaskId(), + v3.handle(new VertexEventTaskCompleted(v3.getTask(0).getTaskID(), TaskState.SUCCEEDED)); Assert.assertEquals(VertexState.COMMITTING, v3.getState()); // dag is still in RUNNING because v3 has not completed @@ -1724,18 +1734,18 @@ private void _testDAGKilledWhileRunning_OnVertexSuccess(DAGTerminationCause term public void testDAGCommitVertexRerunWhileCommitting_OnDAGSuccess() throws Exception { conf.setBoolean(TezConfiguration.TEZ_AM_COMMIT_ALL_OUTPUTS_ON_DAG_SUCCESS, true); - setupDAG(createDAGPlan(true, true)); + setupDAG(createDAGPlan(true, true, "testDAGCommitVertexRerunWhileCommitting_OnDAGSuccess")); initDAG(dag); startDAG(dag); VertexImpl v1 = (VertexImpl) dag.getVertex("vertex1"); VertexImpl v2 = (VertexImpl) dag.getVertex("vertex2"); VertexImpl v3 = (VertexImpl) dag.getVertex("vertex3"); // need to make vertices to go to SUCCEEDED - v1.handle(new VertexEventTaskCompleted(v1.getTask(0).getTaskId(), + v1.handle(new VertexEventTaskCompleted(v1.getTask(0).getTaskID(), TaskState.SUCCEEDED)); - v2.handle(new VertexEventTaskCompleted(v2.getTask(0).getTaskId(), + v2.handle(new VertexEventTaskCompleted(v2.getTask(0).getTaskID(), TaskState.SUCCEEDED)); - v3.handle(new VertexEventTaskCompleted(v3.getTask(0).getTaskId(), + v3.handle(new VertexEventTaskCompleted(v3.getTask(0).getTaskID(), TaskState.SUCCEEDED)); waitUntil(dag, DAGState.COMMITTING); TezTaskID newTaskId = TezTaskID.getInstance(v1.getVertexId(), 1); @@ -1783,18 +1793,18 @@ public void testDAGCommitVertexRerunWhileCommitting_OnDAGSuccess() throws Except public void testDAGCommitInternalErrorWhileCommiting_OnDAGSuccess() throws Exception { conf.setBoolean(TezConfiguration.TEZ_AM_COMMIT_ALL_OUTPUTS_ON_DAG_SUCCESS, true); - setupDAG(createDAGPlan(true, true)); + setupDAG(createDAGPlan(true, true, "testDAGCommitInternalErrorWhileCommiting_OnDAGSuccess")); initDAG(dag); startDAG(dag); VertexImpl v1 = (VertexImpl) dag.getVertex("vertex1"); VertexImpl v2 = (VertexImpl) dag.getVertex("vertex2"); VertexImpl v3 = (VertexImpl) dag.getVertex("vertex3"); // need to make vertices to go to SUCCEEDED - v1.handle(new VertexEventTaskCompleted(v1.getTask(0).getTaskId(), + v1.handle(new VertexEventTaskCompleted(v1.getTask(0).getTaskID(), TaskState.SUCCEEDED)); - v2.handle(new VertexEventTaskCompleted(v2.getTask(0).getTaskId(), + v2.handle(new VertexEventTaskCompleted(v2.getTask(0).getTaskID(), TaskState.SUCCEEDED)); - v3.handle(new VertexEventTaskCompleted(v3.getTask(0).getTaskId(), + v3.handle(new VertexEventTaskCompleted(v3.getTask(0).getTaskID(), TaskState.SUCCEEDED)); waitUntil(dag, DAGState.COMMITTING); dag.handle(new DAGEvent(dag.getID(), DAGEventType.INTERNAL_ERROR)); @@ -1831,7 +1841,7 @@ public void testDAGCommitInternalErrorWhileCommiting_OnDAGSuccess() throws Excep public void testVertexGroupCommitFinishedEventFail_OnVertexSuccess() throws Exception { conf.setBoolean(TezConfiguration.TEZ_AM_COMMIT_ALL_OUTPUTS_ON_DAG_SUCCESS, false); - setupDAG(createDAGPlan(true, true)); + setupDAG(createDAGPlan(true, true, "testVertexGroupCommitFinishedEventFail_OnVertexSuccess")); historyEventHandler.failVertexGroupCommitFinishedEvent = true; initDAG(dag); @@ -1840,11 +1850,11 @@ public void testVertexGroupCommitFinishedEventFail_OnVertexSuccess() throws Exce VertexImpl v2 = (VertexImpl) dag.getVertex("vertex2"); VertexImpl v3 = (VertexImpl) dag.getVertex("vertex3"); // need to make vertices to go to SUCCEEDED - v1.handle(new VertexEventTaskCompleted(v1.getTask(0).getTaskId(), + v1.handle(new VertexEventTaskCompleted(v1.getTask(0).getTaskID(), TaskState.SUCCEEDED)); - v2.handle(new VertexEventTaskCompleted(v2.getTask(0).getTaskId(), + v2.handle(new VertexEventTaskCompleted(v2.getTask(0).getTaskID(), TaskState.SUCCEEDED)); - v3.handle(new VertexEventTaskCompleted(v3.getTask(0).getTaskId(), + v3.handle(new VertexEventTaskCompleted(v3.getTask(0).getTaskID(), TaskState.SUCCEEDED)); CountingOutputCommitter v12OutputCommitter = (CountingOutputCommitter) v1 .getOutputCommitter("v12Out"); @@ -1886,7 +1896,7 @@ public void testVertexGroupCommitFinishedEventFail_OnVertexSuccess() throws Exce public void testDAGCommitStartedEventFail_OnDAGSuccess() throws Exception { conf.setBoolean(TezConfiguration.TEZ_AM_COMMIT_ALL_OUTPUTS_ON_DAG_SUCCESS, true); - setupDAG(createDAGPlan(true, true)); + setupDAG(createDAGPlan(true, true, "testDAGCommitStartedEventFail_OnDAGSuccess")); historyEventHandler.failDAGCommitStartedEvent = true; initDAG(dag); @@ -1895,11 +1905,11 @@ public void testDAGCommitStartedEventFail_OnDAGSuccess() throws Exception { VertexImpl v2 = (VertexImpl) dag.getVertex("vertex2"); VertexImpl v3 = (VertexImpl) dag.getVertex("vertex3"); // need to make vertices to go to SUCCEEDED - v1.handle(new VertexEventTaskCompleted(v1.getTask(0).getTaskId(), + v1.handle(new VertexEventTaskCompleted(v1.getTask(0).getTaskID(), TaskState.SUCCEEDED)); - v2.handle(new VertexEventTaskCompleted(v2.getTask(0).getTaskId(), + v2.handle(new VertexEventTaskCompleted(v2.getTask(0).getTaskID(), TaskState.SUCCEEDED)); - v3.handle(new VertexEventTaskCompleted(v3.getTask(0).getTaskId(), + v3.handle(new VertexEventTaskCompleted(v3.getTask(0).getTaskID(), TaskState.SUCCEEDED)); waitUntil(dag, DAGState.FAILED); Assert.assertEquals(DAGTerminationCause.RECOVERY_FAILURE, dag.getTerminationCause()); @@ -1947,7 +1957,7 @@ public void testCommitCanceled_OnDAGSuccess2() throws Exception { private void _testCommitCanceled_OnDAGSuccess(DAGTerminationCause terminationCause) throws Exception { conf.setBoolean(TezConfiguration.TEZ_AM_COMMIT_ALL_OUTPUTS_ON_DAG_SUCCESS, true); - setupDAG(createDAGPlan(true, true)); + setupDAG(createDAGPlan(true, true, "testDAGCommitStartedEventFail_OnDAGSuccess")); // create customized ThreadPoolExecutor to wait before schedule new task rawExecutor = new ControlledThreadPoolExecutor(1); execService = MoreExecutors.listeningDecorator(rawExecutor); @@ -1959,11 +1969,11 @@ private void _testCommitCanceled_OnDAGSuccess(DAGTerminationCause terminationCau VertexImpl v2 = (VertexImpl) dag.getVertex("vertex2"); VertexImpl v3 = (VertexImpl) dag.getVertex("vertex3"); // need to make vertices to go to SUCCEEDED - v1.handle(new VertexEventTaskCompleted(v1.getTask(0).getTaskId(), + v1.handle(new VertexEventTaskCompleted(v1.getTask(0).getTaskID(), TaskState.SUCCEEDED)); - v2.handle(new VertexEventTaskCompleted(v2.getTask(0).getTaskId(), + v2.handle(new VertexEventTaskCompleted(v2.getTask(0).getTaskID(), TaskState.SUCCEEDED)); - v3.handle(new VertexEventTaskCompleted(v3.getTask(0).getTaskId(), + v3.handle(new VertexEventTaskCompleted(v3.getTask(0).getTaskID(), TaskState.SUCCEEDED)); waitUntil(dag, DAGState.COMMITTING); // mean the commits have been submitted to ThreadPool @@ -2110,7 +2120,7 @@ public void verifyDAGFinishedEvent(TezDAGID dagId, int expectedTimes) { for (HistoryEvent event : historyEvents) { if (event.getEventType() == HistoryEventType.DAG_FINISHED) { DAGFinishedEvent startedEvent = (DAGFinishedEvent)event; - if (startedEvent.getDagID().equals(dagId)) { + if (startedEvent.getDAGID().equals(dagId)) { actualTimes ++; } } diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestDAGImpl.java b/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestDAGImpl.java index 966b464539..46c4fe1cff 100644 --- a/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestDAGImpl.java +++ b/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestDAGImpl.java @@ -18,10 +18,16 @@ package org.apache.tez.dag.app.dag.impl; -import static org.mockito.Matchers.any; -import static org.mockito.Matchers.anyInt; +import static org.mockito.Mockito.any; +import static org.mockito.Mockito.anyBoolean; +import static org.mockito.Mockito.anyInt; +import static org.mockito.Mockito.doAnswer; import static org.mockito.Mockito.doReturn; +import static org.mockito.Mockito.doThrow; import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.spy; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; import java.io.IOException; @@ -39,18 +45,25 @@ import org.apache.commons.lang.StringUtils; import org.apache.tez.common.DrainDispatcher; +import org.apache.tez.common.counters.DAGCounter; import org.apache.tez.common.counters.Limits; import org.apache.tez.common.counters.TezCounters; import org.apache.tez.dag.api.TezConstants; import org.apache.tez.dag.app.dag.event.DAGEventTerminateDag; +import org.apache.tez.dag.app.rm.TaskSchedulerManager; import org.apache.tez.hadoop.shim.DefaultHadoopShim; import org.apache.tez.hadoop.shim.HadoopShim; +import org.junit.Rule; +import org.junit.rules.TestName; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.security.Credentials; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.ApplicationId; +import org.apache.hadoop.yarn.api.records.ContainerId; +import org.apache.hadoop.yarn.api.records.Container; +import org.apache.hadoop.yarn.api.records.NodeId; import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.event.EventHandler; import org.apache.hadoop.yarn.util.Clock; @@ -111,6 +124,7 @@ import org.apache.tez.dag.app.dag.event.DAGAppMasterEventType; import org.apache.tez.dag.app.dag.event.DAGEvent; import org.apache.tez.dag.app.dag.event.DAGEventStartDag; +import org.apache.tez.dag.app.dag.event.DAGEventCommitCompleted; import org.apache.tez.dag.app.dag.event.DAGEventType; import org.apache.tez.dag.app.dag.event.DAGEventVertexCompleted; import org.apache.tez.dag.app.dag.event.DAGEventVertexReRunning; @@ -140,13 +154,13 @@ import org.apache.tez.runtime.api.impl.EventMetaData; import org.apache.tez.runtime.api.impl.EventMetaData.EventProducerConsumerType; import org.apache.tez.runtime.api.impl.TezEvent; +import org.apache.tez.state.StateMachineTez; import org.junit.After; import org.junit.Assert; import org.junit.Before; import org.junit.BeforeClass; import org.junit.Ignore; import org.junit.Test; -import org.mockito.Mockito; import org.mockito.invocation.InvocationOnMock; import org.mockito.stubbing.Answer; @@ -157,6 +171,9 @@ public class TestDAGImpl { + @Rule + public TestName testName = new TestName(); + private static final Logger LOG = LoggerFactory.getLogger(TestDAGImpl.class); private DAGPlan dagPlan; private TezDAGID dagId; @@ -168,6 +185,7 @@ public class TestDAGImpl { private ACLManager aclManager; private ApplicationAttemptId appAttemptId; private DAGImpl dag; + private TaskSchedulerManager taskSchedulerManager; private TaskEventDispatcher taskEventDispatcher; private VertexEventDispatcher vertexEventDispatcher; private DagEventDispatcher dagEventDispatcher; @@ -218,7 +236,7 @@ private DAGImpl chooseDAG(TezDAGID curDAGId) { private class DagEventDispatcher implements EventHandler { @Override public void handle(DAGEvent event) { - DAGImpl dag = chooseDAG(event.getDAGId()); + DAGImpl dag = chooseDAG(event.getDAGID()); dag.handle(event); } } @@ -227,9 +245,9 @@ private class TaskEventDispatcher implements EventHandler { @SuppressWarnings("unchecked") @Override public void handle(TaskEvent event) { - TezDAGID id = event.getTaskID().getVertexID().getDAGId(); + TezDAGID id = event.getDAGID(); DAGImpl handler = chooseDAG(id); - Vertex vertex = handler.getVertex(event.getTaskID().getVertexID()); + Vertex vertex = handler.getVertex(event.getVertexID()); Task task = vertex.getTask(event.getTaskID()); ((EventHandler)task).handle(event); } @@ -246,10 +264,10 @@ public void handle(TaskAttemptEvent event) { private class TaskAttemptEventDisptacher2 implements EventHandler { @Override public void handle(TaskAttemptEvent event) { - TezDAGID id = event.getTaskAttemptID().getTaskID().getVertexID().getDAGId(); + TezDAGID id = event.getDAGID(); DAGImpl handler = chooseDAG(id); - Vertex vertex = handler.getVertex(event.getTaskAttemptID().getTaskID().getVertexID()); - Task task = vertex.getTask(event.getTaskAttemptID().getTaskID()); + Vertex vertex = handler.getVertex(event.getVertexID()); + Task task = vertex.getTask(event.getTaskID()); TaskAttempt ta = task.getAttempt(event.getTaskAttemptID()); ((EventHandler)ta).handle(event); } @@ -261,9 +279,9 @@ private class VertexEventDispatcher @SuppressWarnings("unchecked") @Override public void handle(VertexEvent event) { - TezDAGID id = event.getVertexId().getDAGId(); + TezDAGID id = event.getDAGID(); DAGImpl handler = chooseDAG(id); - Vertex vertex = handler.getVertex(event.getVertexId()); + Vertex vertex = handler.getVertex(event.getVertexID()); ((EventHandler) vertex).handle(event); } } @@ -422,7 +440,7 @@ public void commitOutput() throws IOException { } // Create a plan with 3 vertices: A, B, C. Group(A,B)->C - static DAGPlan createGroupDAGPlan() { + static DAGPlan createGroupDAGPlan(String dagName) { LOG.info("Setting up group dag plan"); int dummyTaskCount = 1; Resource dummyTaskResource = Resource.newInstance(1, 1); @@ -436,7 +454,7 @@ static DAGPlan createGroupDAGPlan() { ProcessorDescriptor.create("Processor"), dummyTaskCount, dummyTaskResource); - DAG dag = DAG.create("testDag"); + DAG dag = DAG.create("DAG-" + dagName); String groupName1 = "uv12"; OutputCommitterDescriptor ocd = OutputCommitterDescriptor.create( TotalCountingOutputCommitter.class.getName()); @@ -848,12 +866,13 @@ public void setup() { dispatcher = new DrainDispatcher(); fsTokens = new Credentials(); appContext = mock(AppContext.class); + taskSchedulerManager = mock(TaskSchedulerManager.class); execService = mock(ListeningExecutorService.class); final ListenableFuture mockFuture = mock(ListenableFuture.class); when(appContext.getHadoopShim()).thenReturn(defaultShim); when(appContext.getApplicationID()).thenReturn(appAttemptId.getApplicationId()); - - Mockito.doAnswer(new Answer() { + + doAnswer(new Answer() { public ListenableFuture answer(InvocationOnMock invocation) { Object[] args = invocation.getArguments(); CallableEvent e = (CallableEvent) args[0]; @@ -902,7 +921,7 @@ public ListenableFuture answer(InvocationOnMock invocation) { doReturn(defaultShim).when(groupAppContext).getHadoopShim(); groupDagId = TezDAGID.getInstance(appAttemptId.getApplicationId(), 3); - groupDagPlan = createGroupDAGPlan(); + groupDagPlan = createGroupDAGPlan(testName.getMethodName()); groupDag = new DAGImpl(groupDagId, conf, groupDagPlan, dispatcher.getEventHandler(), taskCommunicatorManagerInterface, fsTokens, clock, "user", thh, @@ -1171,14 +1190,14 @@ public void testEdgeManager_RouteDataMovementEventToDestination() { VertexImpl v2 = (VertexImpl)dagWithCustomEdge.getVertex("vertex2"); dispatcher.await(); Task t1= v2.getTask(0); - TaskAttemptImpl ta1= (TaskAttemptImpl)t1.getAttempt(TezTaskAttemptID.getInstance(t1.getTaskId(), 0)); + TaskAttemptImpl ta1= (TaskAttemptImpl)t1.getAttempt(TezTaskAttemptID.getInstance(t1.getTaskID(), 0)); DataMovementEvent daEvent = DataMovementEvent.create(ByteBuffer.wrap(new byte[0])); TezEvent tezEvent = new TezEvent(daEvent, - new EventMetaData(EventProducerConsumerType.INPUT, "vertex1", "vertex2", ta1.getID())); + new EventMetaData(EventProducerConsumerType.INPUT, "vertex1", "vertex2", ta1.getTaskAttemptID())); dispatcher.getEventHandler().handle(new VertexEventRouteEvent(v2.getVertexId(), Lists.newArrayList(tezEvent))); dispatcher.await(); - v2.getTaskAttemptTezEvents(ta1.getID(), 0, 0, 1000); + v2.getTaskAttemptTezEvents(ta1.getTaskAttemptID(), 0, 0, 1000); dispatcher.await(); Assert.assertEquals(VertexState.FAILED, v2.getState()); @@ -1204,11 +1223,11 @@ public void testEdgeManager_RouteDataMovementEventToDestinationWithLegacyRouting dispatcher.await(); Task t1= v2.getTask(0); - TaskAttemptImpl ta1= (TaskAttemptImpl)t1.getAttempt(TezTaskAttemptID.getInstance(t1.getTaskId(), 0)); + TaskAttemptImpl ta1= (TaskAttemptImpl)t1.getAttempt(TezTaskAttemptID.getInstance(t1.getTaskID(), 0)); DataMovementEvent daEvent = DataMovementEvent.create(ByteBuffer.wrap(new byte[0])); TezEvent tezEvent = new TezEvent(daEvent, - new EventMetaData(EventProducerConsumerType.INPUT, "vertex1", "vertex2", ta1.getID())); + new EventMetaData(EventProducerConsumerType.INPUT, "vertex1", "vertex2", ta1.getTaskAttemptID())); dispatcher.getEventHandler().handle( new VertexEventRouteEvent(v2.getVertexId(), Lists.newArrayList(tezEvent))); dispatcher.await(); @@ -1236,13 +1255,13 @@ public void testEdgeManager_RouteInputSourceTaskFailedEventToDestinationLegacyRo dispatcher.await(); Task t1= v2.getTask(0); - TaskAttemptImpl ta1= (TaskAttemptImpl)t1.getAttempt(TezTaskAttemptID.getInstance(t1.getTaskId(), 0)); + TaskAttemptImpl ta1= (TaskAttemptImpl)t1.getAttempt(TezTaskAttemptID.getInstance(t1.getTaskID(), 0)); InputFailedEvent ifEvent = InputFailedEvent.create(0, 1); TezEvent tezEvent = new TezEvent(ifEvent, - new EventMetaData(EventProducerConsumerType.INPUT,"vertex1", "vertex2", ta1.getID())); + new EventMetaData(EventProducerConsumerType.INPUT,"vertex1", "vertex2", ta1.getTaskAttemptID())); dispatcher.getEventHandler().handle(new VertexEventRouteEvent(v2.getVertexId(), Lists.newArrayList(tezEvent))); dispatcher.await(); - v2.getTaskAttemptTezEvents(ta1.getID(), 0, 0, 1000); + v2.getTaskAttemptTezEvents(ta1.getTaskAttemptID(), 0, 0, 1000); dispatcher.await(); Assert.assertEquals(VertexState.FAILED, v2.getState()); @@ -1267,11 +1286,11 @@ public void testEdgeManager_GetNumDestinationConsumerTasks() { dispatcher.await(); Task t1= v2.getTask(0); - TaskAttemptImpl ta1= (TaskAttemptImpl)t1.getAttempt(TezTaskAttemptID.getInstance(t1.getTaskId(), 0)); + TaskAttemptImpl ta1= (TaskAttemptImpl)t1.getAttempt(TezTaskAttemptID.getInstance(t1.getTaskID(), 0)); InputReadErrorEvent ireEvent = InputReadErrorEvent.create("", 0, 0); TezEvent tezEvent = new TezEvent(ireEvent, - new EventMetaData(EventProducerConsumerType.INPUT,"vertex2", "vertex1", ta1.getID())); + new EventMetaData(EventProducerConsumerType.INPUT,"vertex2", "vertex1", ta1.getTaskAttemptID())); dispatcher.getEventHandler().handle( new VertexEventRouteEvent(v2.getVertexId(), Lists.newArrayList(tezEvent))); dispatcher.await(); @@ -1298,10 +1317,10 @@ public void testEdgeManager_RouteInputErrorEventToSource() { dispatcher.await(); Task t1= v2.getTask(0); - TaskAttemptImpl ta1= (TaskAttemptImpl)t1.getAttempt(TezTaskAttemptID.getInstance(t1.getTaskId(), 0)); + TaskAttemptImpl ta1= (TaskAttemptImpl)t1.getAttempt(TezTaskAttemptID.getInstance(t1.getTaskID(), 0)); InputReadErrorEvent ireEvent = InputReadErrorEvent.create("", 0, 0); TezEvent tezEvent = new TezEvent(ireEvent, - new EventMetaData(EventProducerConsumerType.INPUT,"vertex2", "vertex1", ta1.getID())); + new EventMetaData(EventProducerConsumerType.INPUT,"vertex2", "vertex1", ta1.getTaskAttemptID())); dispatcher.getEventHandler().handle(new VertexEventRouteEvent(v2.getVertexId(), Lists.newArrayList(tezEvent))); dispatcher.await(); // @@ -1759,7 +1778,7 @@ public void testGetDAGStatusWithWait() throws TezException { DAGStatusBuilder dagStatus = dag.getDAGStatus(EnumSet.noneOf(StatusGetOpts.class), 2000l); long dagStatusEndTime = System.currentTimeMillis(); long diff = dagStatusEndTime - dagStatusStartTime; - Assert.assertTrue(diff > 1500 && diff < 2500); + Assert.assertTrue(diff >= 0 && diff < 2500); Assert.assertEquals(DAGStatusBuilder.State.RUNNING, dagStatus.getState()); } @@ -1802,6 +1821,9 @@ public void runTestGetDAGStatusReturnOnDagFinished(DAGStatusBuilder.State testSt dispatcher.await(); Assert.assertEquals(DAGState.RUNNING, dag.getState()); Assert.assertEquals(5, dag.getSuccessfulVertices()); + // Verify that dagStatus is running state + Assert.assertEquals(DAGStatus.State.RUNNING, dag.getDAGStatus(EnumSet.noneOf(StatusGetOpts.class), + 10000L).getState()); ReentrantLock lock = new ReentrantLock(); Condition startCondition = lock.newCondition(); @@ -1848,7 +1870,8 @@ public void runTestGetDAGStatusReturnOnDagFinished(DAGStatusBuilder.State testSt long diff = statusCheckRunnable.dagStatusEndTime - statusCheckRunnable.dagStatusStartTime; Assert.assertNotNull(statusCheckRunnable.dagStatus); - Assert.assertTrue(diff > 1000 && diff < 3500); + Assert.assertTrue("Status: " + statusCheckRunnable.dagStatus.getState() + + ", Diff:" + diff, diff >= 0 && diff < 3500); Assert.assertEquals(testState, statusCheckRunnable.dagStatus.getState()); t1.join(); } @@ -1929,6 +1952,51 @@ private void _testDAGTerminate(DAGTerminationCause terminationCause) { Assert.assertEquals(1, dagFinishEventHandler.dagFinishEvents); } + @Test (timeout = 5000L) + @SuppressWarnings("unchecked") + public void testDAGHang() throws Exception { + conf.setBoolean( + TezConfiguration.TEZ_AM_COMMIT_ALL_OUTPUTS_ON_DAG_SUCCESS, + false); + dag = spy(new DAGImpl(dagId, conf, dagPlan, + dispatcher.getEventHandler(), taskCommunicatorManagerInterface, + fsTokens, clock, "user", thh, appContext)); + StateMachineTez spyStateMachine = + spy(new StateMachineTez( + dag.stateMachineFactory.make(dag), dag)); + when(dag.getStateMachine()).thenReturn(spyStateMachine); + dag.entityUpdateTracker = new StateChangeNotifierForTest(dag); + doReturn(dag).when(appContext).getCurrentDAG(); + DAGImpl.OutputKey outputKey = mock(DAGImpl.OutputKey.class); + ListenableFuture future = mock(ListenableFuture.class); + dag.commitFutures.put(outputKey, future); + initDAG(dag); + startDAG(dag); + dispatcher.await(); + + dispatcher.getEventHandler().handle(new DAGEventVertexCompleted( + TezVertexID.getInstance(dagId, 0), VertexState.SUCCEEDED)); + dispatcher.getEventHandler().handle(new DAGEventVertexCompleted( + TezVertexID.getInstance(dagId, 1), VertexState.SUCCEEDED)); + dispatcher.getEventHandler().handle(new DAGEventVertexCompleted( + TezVertexID.getInstance(dagId, 2), VertexState.SUCCEEDED)); + dispatcher.getEventHandler().handle(new DAGEventVertexCompleted( + TezVertexID.getInstance(dagId, 3), VertexState.SUCCEEDED)); + dispatcher.getEventHandler().handle(new DAGEventVertexCompleted( + TezVertexID.getInstance(dagId, 4), VertexState.SUCCEEDED)); + dispatcher.getEventHandler().handle(new DAGEventVertexCompleted( + TezVertexID.getInstance(dagId, 5), VertexState.SUCCEEDED)); + dispatcher.await(); + Assert.assertEquals(DAGState.COMMITTING, dag.getState()); + DAGEventCommitCompleted dagEvent = new DAGEventCommitCompleted( + dagId, outputKey, false , new RuntimeException("test")); + doThrow(new RuntimeException("test")).when( + dag).logJobHistoryUnsuccesfulEvent(any(), any()); + dag.handle(dagEvent); + dispatcher.await(); + Assert.assertTrue("DAG did not terminate!", dag.getInternalState() == DAGState.FAILED); + } + @Test(timeout = 5000) public void testDAGKillVertexSuccessAfterTerminated() { _testDAGKillVertexSuccessAfterTerminated(DAGTerminationCause.DAG_KILL); @@ -2296,4 +2364,71 @@ public void testCounterLimits() { } + @Test(timeout = 5000) + public void testTotalContainersUsedCounter() { + DAGImpl spy = getDagSpy(); + + spy.addUsedContainer(Container.newInstance(ContainerId.fromString("container_e16_1504924099862_7571_01_000005"), + mock(NodeId.class), null, null, null, null)); + spy.addUsedContainer(Container.newInstance(ContainerId.fromString("container_e16_1504924099862_7571_01_000006"), + mock(NodeId.class), null, null, null, null)); + + spy.onFinish(); + // 2 calls to addUsedContainer + verify(spy, times(2)).addUsedContainer(any(Container.class)); + // 2 containers were used + Assert.assertEquals(2, + spy.getAllCounters().getGroup(DAGCounter.class.getName()).findCounter(DAGCounter.TOTAL_CONTAINERS_USED.name()) + .getValue()); + } + + @Test(timeout = 5000) + public void testNodesUsedCounter() { + DAGImpl spy = getDagSpy(); + + Container containerOnHost = mock(Container.class); + when(containerOnHost.getNodeId()).thenReturn(NodeId.fromString("localhost:0")); + Container containerOnSameHost = mock(Container.class); + when(containerOnSameHost.getNodeId()).thenReturn(NodeId.fromString("localhost:0")); + Container containerOnDifferentHost = mock(Container.class); + when(containerOnDifferentHost.getNodeId()).thenReturn(NodeId.fromString("otherhost:0")); + Container containerOnSameHostWithDifferentPort = mock(Container.class); + when(containerOnSameHostWithDifferentPort.getNodeId()).thenReturn(NodeId.fromString("localhost:1")); + + spy.addUsedContainer(containerOnHost); + spy.addUsedContainer(containerOnSameHost); + spy.addUsedContainer(containerOnDifferentHost); + spy.addUsedContainer(containerOnSameHostWithDifferentPort); + + when(taskSchedulerManager.getNumClusterNodes(anyBoolean())).thenReturn(10); + + spy.onFinish(); + // 4 calls to addUsedContainer + verify(spy, times(4)).addUsedContainer(any(Container.class)); + + // 2 distinct node hosts were seen: localhost, otherhost + Assert.assertEquals(2, + spy.getAllCounters().getGroup(DAGCounter.class.getName()).findCounter(DAGCounter.NODE_USED_COUNT.name()) + .getValue()); + + Assert.assertTrue(spy.nodesUsedByCurrentDAG.contains("localhost")); + Assert.assertTrue(spy.nodesUsedByCurrentDAG.contains("otherhost")); + + Assert.assertEquals(10, + spy.getAllCounters().getGroup(DAGCounter.class.getName()) + .findCounter(DAGCounter.NODE_TOTAL_COUNT.name()) + .getValue()); + } + + private DAGImpl getDagSpy() { + initDAG(mrrDag); + dispatcher.await(); + startDAG(mrrDag); + dispatcher.await(); + + // needed when onFinish() method is called on a DAGImpl + when(mrrAppContext.getTaskScheduler()).thenReturn(taskSchedulerManager); + + return spy(mrrDag); + } } diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestDAGRecovery.java b/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestDAGRecovery.java index 260bd42f2c..159dd9fb7f 100644 --- a/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestDAGRecovery.java +++ b/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestDAGRecovery.java @@ -17,11 +17,6 @@ */ package org.apache.tez.dag.app.dag.impl; -import static org.mockito.Matchers.any; -import static org.mockito.Mockito.doReturn; -import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.when; - import java.io.IOException; import java.nio.ByteBuffer; import java.util.ArrayList; @@ -132,13 +127,17 @@ import org.junit.Before; import org.junit.BeforeClass; import org.junit.Test; -import org.mockito.Mockito; import org.mockito.invocation.InvocationOnMock; import org.mockito.stubbing.Answer; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import static org.junit.Assert.*; +import static org.mockito.Mockito.any; +import static org.mockito.Mockito.doAnswer; +import static org.mockito.Mockito.doReturn; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; import com.google.common.util.concurrent.ListenableFuture; import com.google.common.util.concurrent.ListeningExecutorService; @@ -176,14 +175,23 @@ public class TestDAGRecovery { private TezVertexID v2Id; private TezTaskID t1v2Id; private TezTaskAttemptID ta1t1v2Id; + private TezVertexID v3Id; + private TezTaskID t1v3Id; + private TezTaskAttemptID ta1t1v3Id; //////////////////////// private Random rand = new Random(); private long dagInitedTime = System.currentTimeMillis() + rand.nextInt(100); private long dagStartedTime = dagInitedTime + rand.nextInt(100); private long v1InitedTime = dagStartedTime + rand.nextInt(100); + private long v2InitedTime = dagStartedTime + rand.nextInt(100); + private long v3InitedTime = Math.max(v1InitedTime, v2InitedTime) + rand.nextInt(100); private long v1StartedTime = v1InitedTime + rand.nextInt(100); + private long v2StartedTime = v2InitedTime + rand.nextInt(100); + private long v3StartedTime = v3InitedTime + rand.nextInt(100); private int v1NumTask = 10; + private int v2NumTask = 5; + private int v3NumTask = 2; private long t1StartedTime = v1StartedTime + rand.nextInt(100); private long t1FinishedTime = t1StartedTime + rand.nextInt(100); private long ta1LaunchTime = t1StartedTime + rand.nextInt(100); @@ -200,7 +208,7 @@ private class TaskEventDispatcher implements EventHandler { @SuppressWarnings("unchecked") @Override public void handle(TaskEvent event) { - TaskImpl task = (TaskImpl) dag.getVertex(event.getTaskID().getVertexID()) + TaskImpl task = (TaskImpl) dag.getVertex(event.getVertexID()) .getTask(event.getTaskID()); task.handle(event); } @@ -211,8 +219,7 @@ private class TaskAttemptEventDispatcher implements EventHandler { @Override public void handle(TaskAttemptEvent event) { - Vertex vertex = dag.getVertex(event.getTaskAttemptID().getTaskID() - .getVertexID()); + Vertex vertex = dag.getVertex(event.getVertexID()); Task task = vertex.getTask(event.getTaskAttemptID().getTaskID()); TaskAttempt ta = task.getAttempt(event.getTaskAttemptID()); ((EventHandler) ta).handle(event); @@ -224,7 +231,7 @@ private class VertexEventDispatcher implements EventHandler { @SuppressWarnings("unchecked") @Override public void handle(VertexEvent event) { - VertexImpl vertex = (VertexImpl) dag.getVertex(event.getVertexId()); + VertexImpl vertex = (VertexImpl) dag.getVertex(event.getVertexID()); vertex.handle(event); } } @@ -318,7 +325,7 @@ public void setup() { when(appContext.getApplicationID()).thenReturn(appAttemptId.getApplicationId()); when(appContext.getClock()).thenReturn(new SystemClock()); - Mockito.doAnswer(new Answer() { + doAnswer(new Answer() { public ListenableFuture answer(InvocationOnMock invocation) { Object[] args = invocation.getArguments(); CallableEvent e = (CallableEvent) args[0]; @@ -354,6 +361,9 @@ public ListenableFuture answer(InvocationOnMock invocation) { v2Id = TezVertexID.getInstance(dagId, 1); t1v2Id = TezTaskID.getInstance(v2Id, 0); ta1t1v2Id = TezTaskAttemptID.getInstance(t1v2Id, 0); + v3Id = TezVertexID.getInstance(dagId, 2); + t1v3Id = TezTaskID.getInstance(v3Id, 0); + ta1t1v3Id = TezTaskAttemptID.getInstance(t1v3Id, 0); dispatcher.register(CallableEventType.class, new CallableEventDispatcher()); taskEventDispatcher = new TaskEventDispatcher(); @@ -724,7 +734,7 @@ public void testVertexRecoverFromInited() { * DAG: DAGInitedEvent -> DAGStartedEvent * V1: VertexReconfigrationDoneEvent -> VertexInitializedEvent * - * V1 skip initialization. + * Reinitialize V1 again. */ @Test//(timeout=5000) public void testVertexRecoverFromInitedAndReconfigureDone() { @@ -734,7 +744,7 @@ public void testVertexRecoverFromInitedAndReconfigureDone() { "vertex1", 0L, v1InitedTime, v1NumTask, "", null, inputGeneratedTezEvents, null); VertexConfigurationDoneEvent v1ReconfigureDoneEvent = new VertexConfigurationDoneEvent(v1Id, - 0L, v1NumTask, null, null, null, true); + 0L, v1NumTask, null, null, null, false); VertexRecoveryData vertexRecoveryData = new VertexRecoveryData(v1InitedEvent, v1ReconfigureDoneEvent, null, null, new HashMap(), false); doReturn(vertexRecoveryData).when(dagRecoveryData).getVertexRecoveryData(v1Id); @@ -747,6 +757,76 @@ public void testVertexRecoverFromInitedAndReconfigureDone() { } catch (InterruptedException e) { e.printStackTrace(); } + VertexImpl v1 = (VertexImpl)dag.getVertex("vertex1"); + VertexImpl v2 = (VertexImpl)dag.getVertex("vertex2"); + VertexImpl v3 = (VertexImpl)dag.getVertex("vertex3"); + assertEquals(DAGState.RUNNING, dag.getState()); + // reinitialize v1 + assertEquals(VertexState.INITIALIZING, v1.getState()); + assertEquals(VertexState.RUNNING, v2.getState()); + assertEquals(VertexState.INITED, v3.getState()); + } + + /** + * RecoveryEvents: + * DAG: DAGInitedEvent -> DAGStartedEvent + * V1: VertexReconfigrationDoneEvent -> VertexInitializedEvent -> VertexStartedEvent + * + * Reinitialize V1 again. + */ + @Test(timeout=5000) + public void testVertexRecoverFromStart() { + initMockDAGRecoveryDataForVertex(); + List inputGeneratedTezEvents = new ArrayList(); + VertexInitializedEvent v1InitedEvent = new VertexInitializedEvent(v1Id, + "vertex1", 0L, v1InitedTime, + v1NumTask, "", null, inputGeneratedTezEvents, null); + VertexConfigurationDoneEvent v1ReconfigureDoneEvent = new VertexConfigurationDoneEvent(v1Id, + 0L, v1NumTask, null, null, null, false); + VertexStartedEvent v1StartedEvent = new VertexStartedEvent(v1Id, 0L, v1StartedTime); + VertexRecoveryData vertexRecoveryData = new VertexRecoveryData(v1InitedEvent, + v1ReconfigureDoneEvent, v1StartedEvent, null, new HashMap(), false); + doReturn(vertexRecoveryData).when(dagRecoveryData).getVertexRecoveryData(v1Id); + + DAGEventRecoverEvent recoveryEvent = new DAGEventRecoverEvent(dagId, dagRecoveryData); + dag.handle(recoveryEvent); + dispatcher.await(); + + VertexImpl v1 = (VertexImpl)dag.getVertex("vertex1"); + VertexImpl v2 = (VertexImpl)dag.getVertex("vertex2"); + VertexImpl v3 = (VertexImpl)dag.getVertex("vertex3"); + assertEquals(DAGState.RUNNING, dag.getState()); + // reinitialize v1 + assertEquals(VertexState.INITIALIZING, v1.getState()); + assertEquals(VertexState.RUNNING, v2.getState()); + assertEquals(VertexState.INITED, v3.getState()); + } + + /** + * RecoveryEvents: + * DAG: DAGInitedEvent -> DAGStartedEvent + * V1: VertexReconfigrationDoneEvent -> VertexInitializedEvent -> VertexStartedEvent -> setParallelismCalledFlag + * + * V1 skip initialization. + */ + @Test(timeout=5000) + public void testVertexRecoverWithSetParallelismCalledFlag() { + initMockDAGRecoveryDataForVertex(); + List inputGeneratedTezEvents = new ArrayList(); + VertexInitializedEvent v1InitedEvent = new VertexInitializedEvent(v1Id, + "vertex1", 0L, v1InitedTime, + v1NumTask, "", null, inputGeneratedTezEvents, null); + VertexConfigurationDoneEvent v1ReconfigureDoneEvent = new VertexConfigurationDoneEvent(v1Id, + 0L, v1NumTask, null, null, null, true); + VertexStartedEvent v1StartedEvent = new VertexStartedEvent(v1Id, 0L, v1StartedTime); + VertexRecoveryData vertexRecoveryData = new VertexRecoveryData(v1InitedEvent, + v1ReconfigureDoneEvent, v1StartedEvent, null, new HashMap(), false); + doReturn(vertexRecoveryData).when(dagRecoveryData).getVertexRecoveryData(v1Id); + + DAGEventRecoverEvent recoveryEvent = new DAGEventRecoverEvent(dagId, dagRecoveryData); + dag.handle(recoveryEvent); + dispatcher.await(); + VertexImpl v1 = (VertexImpl)dag.getVertex("vertex1"); VertexImpl v2 = (VertexImpl)dag.getVertex("vertex2"); VertexImpl v3 = (VertexImpl)dag.getVertex("vertex3"); @@ -754,32 +834,122 @@ public void testVertexRecoverFromInitedAndReconfigureDone() { // v1 skip initialization assertEquals(VertexState.RUNNING, v1.getState()); assertEquals(v1InitedTime, v1.initedTime); + assertEquals(v1StartedTime, v1.startedTime); assertEquals(v1NumTask, v1.getTotalTasks()); assertEquals(VertexState.RUNNING, v2.getState()); assertEquals(VertexState.RUNNING, v3.getState()); } - + /** * RecoveryEvents: - * DAG: DAGInitedEvent -> DAGStartedEvent - * V1: VertexReconfigrationDoneEvent -> VertexInitializedEvent -> VertexStartedEvent - * - * V1 skip initialization. + * DAG: DAGInitedEvent -> DAGStartedEvent + * V1: VertexReconfigrationDoneEvent -> VertexInitializedEvent -> VertexStartedEvent -> VertexTaskStart + * + * V1 skip initialization. */ @Test(timeout=5000) - public void testVertexRecoverFromStart() { - initMockDAGRecoveryDataForVertex(); + public void testVertexRecoverFromVertexTaskStart() { + initMockDAGRecoveryDataForVertex(); List inputGeneratedTezEvents = new ArrayList(); - VertexInitializedEvent v1InitedEvent = new VertexInitializedEvent(v1Id, - "vertex1", 0L, v1InitedTime, + VertexInitializedEvent v1InitedEvent = new VertexInitializedEvent(v1Id, + "vertex1", 0L, v1InitedTime, v1NumTask, "", null, inputGeneratedTezEvents, null); - VertexConfigurationDoneEvent v1ReconfigureDoneEvent = new VertexConfigurationDoneEvent(v1Id, + VertexConfigurationDoneEvent v1ReconfigureDoneEvent = new VertexConfigurationDoneEvent(v1Id, 0L, v1NumTask, null, null, null, true); VertexStartedEvent v1StartedEvent = new VertexStartedEvent(v1Id, 0L, v1StartedTime); + + TaskStartedEvent taskStartedEvent = new TaskStartedEvent(t1v1Id, "v1", 0L, 0L); + TaskRecoveryData taskRecoveryData = new TaskRecoveryData(taskStartedEvent, null, null); + Map taskRecoveryDataMap = new HashMap<>(); + // put dummy tasks + taskRecoveryDataMap.put(t1v2Id, taskRecoveryData); + VertexRecoveryData vertexRecoveryData = new VertexRecoveryData(v1InitedEvent, - v1ReconfigureDoneEvent, v1StartedEvent, null, new HashMap(), false); + v1ReconfigureDoneEvent, v1StartedEvent, null, taskRecoveryDataMap, false); doReturn(vertexRecoveryData).when(dagRecoveryData).getVertexRecoveryData(v1Id); - + + DAGEventRecoverEvent recoveryEvent = new DAGEventRecoverEvent(dagId, dagRecoveryData); + dag.handle(recoveryEvent); + dispatcher.await(); + + VertexImpl v1 = (VertexImpl)dag.getVertex("vertex1"); + VertexImpl v2 = (VertexImpl)dag.getVertex("vertex2"); + VertexImpl v3 = (VertexImpl)dag.getVertex("vertex3"); + assertEquals(DAGState.RUNNING, dag.getState()); + // v1 skip initialization + assertEquals(VertexState.RUNNING, v1.getState()); + assertEquals(v1InitedTime, v1.initedTime); + assertEquals(v1StartedTime, v1.startedTime); + assertEquals(v1NumTask, v1.getTotalTasks()); + assertEquals(VertexState.RUNNING, v2.getState()); + assertEquals(VertexState.RUNNING, v3.getState()); + } + + /** + * RecoveryEvents: + * DAG: DAGInitedEvent -> DAGStartedEvent + * V1: VertexReconfigrationDoneEvent -> VertexInitializedEvent -> VertexStartedEvent -> VertexTaskStart + * V2: VertexReconfigrationDoneEvent -> VertexInitializedEvent -> VertexStartedEvent -> VertexTaskStart + * V3: VertexReconfigrationDoneEvent -> VertexInitializedEvent -> VertexStartedEvent -> VertexTaskStart + * + * V1 skip initialization. + * V2 skip initialization. + * V3 skip initialization. + */ + @Test(timeout=5000) + public void testMultipleVertexRecoverFromVertexTaskStart() { + initMockDAGRecoveryDataForVertex(); + List inputGeneratedTezEvents = new ArrayList(); + VertexInitializedEvent v1InitedEvent = new VertexInitializedEvent(v1Id, + "vertex1", 0L, v1InitedTime, + v1NumTask, "", null, inputGeneratedTezEvents, null); + VertexInitializedEvent v2InitedEvent = new VertexInitializedEvent(v2Id, + "vertex2", 0L, v2InitedTime, + v2NumTask, "", null, inputGeneratedTezEvents, null); + VertexInitializedEvent v3InitedEvent = new VertexInitializedEvent(v3Id, + "vertex3", 0L, v3InitedTime, + v3NumTask, "", null, inputGeneratedTezEvents, null); + + VertexConfigurationDoneEvent v1ReconfigureDoneEvent = new VertexConfigurationDoneEvent(v1Id, + 0L, v1NumTask, null, null, null, true); + VertexConfigurationDoneEvent v2ReconfigureDoneEvent = new VertexConfigurationDoneEvent(v2Id, + 0L, v2NumTask, null, null, null, true); + VertexConfigurationDoneEvent v3ReconfigureDoneEvent = new VertexConfigurationDoneEvent(v3Id, + 0L, v3NumTask, null, null, null, true); + + VertexStartedEvent v1StartedEvent = new VertexStartedEvent(v1Id, 0L, v1StartedTime); + VertexStartedEvent v2StartedEvent = new VertexStartedEvent(v2Id, 0L, v2StartedTime); + VertexStartedEvent v3StartedEvent = new VertexStartedEvent(v3Id, 0L, v3StartedTime); + + TaskStartedEvent v1taskStartedEvent = new TaskStartedEvent(t1v1Id, "vertex1", 0L, 0L); + TaskRecoveryData v1taskRecoveryData = new TaskRecoveryData(v1taskStartedEvent, null, null); + Map v1taskRecoveryDataMap = new HashMap<>(); + // put dummy tasks + v1taskRecoveryDataMap.put(t1v1Id, v1taskRecoveryData); + + TaskStartedEvent v2taskStartedEvent = new TaskStartedEvent(t1v2Id, "vertex2", 0L, 0L); + TaskRecoveryData v2taskRecoveryData = new TaskRecoveryData(v2taskStartedEvent, null, null); + Map v2taskRecoveryDataMap = new HashMap<>(); + // put dummy tasks + v2taskRecoveryDataMap.put(t1v2Id, v2taskRecoveryData); + + TaskStartedEvent v3taskStartedEvent = new TaskStartedEvent(t1v3Id, "vertex3", 0L, 0L); + TaskRecoveryData v3taskRecoveryData = new TaskRecoveryData(v3taskStartedEvent, null, null); + Map v3taskRecoveryDataMap = new HashMap<>(); + // put dummy tasks + v3taskRecoveryDataMap.put(t1v3Id, v3taskRecoveryData); + + VertexRecoveryData vertex1RecoveryData = new VertexRecoveryData(v1InitedEvent, + v1ReconfigureDoneEvent, v1StartedEvent, null, v1taskRecoveryDataMap, false); + VertexRecoveryData vertex2RecoveryData = new VertexRecoveryData(v2InitedEvent, + v2ReconfigureDoneEvent, v2StartedEvent, null, v2taskRecoveryDataMap, false); + VertexRecoveryData vertex3RecoveryData = new VertexRecoveryData(v3InitedEvent, + v3ReconfigureDoneEvent, v3StartedEvent, null, v3taskRecoveryDataMap, false); + + doReturn(vertex1RecoveryData).when(dagRecoveryData).getVertexRecoveryData(v1Id); + doReturn(vertex2RecoveryData).when(dagRecoveryData).getVertexRecoveryData(v2Id); + doReturn(vertex3RecoveryData).when(dagRecoveryData).getVertexRecoveryData(v3Id); + DAGEventRecoverEvent recoveryEvent = new DAGEventRecoverEvent(dagId, dagRecoveryData); dag.handle(recoveryEvent); dispatcher.await(); @@ -787,14 +957,104 @@ public void testVertexRecoverFromStart() { VertexImpl v1 = (VertexImpl)dag.getVertex("vertex1"); VertexImpl v2 = (VertexImpl)dag.getVertex("vertex2"); VertexImpl v3 = (VertexImpl)dag.getVertex("vertex3"); + assertEquals(DAGState.RUNNING, dag.getState()); + // v1 skip initialization assertEquals(VertexState.RUNNING, v1.getState()); assertEquals(v1InitedTime, v1.initedTime); assertEquals(v1StartedTime, v1.startedTime); assertEquals(v1NumTask, v1.getTotalTasks()); + + // v2 skip initialization assertEquals(VertexState.RUNNING, v2.getState()); + assertEquals(v2InitedTime, v2.initedTime); + assertEquals(v2StartedTime, v2.startedTime); + assertEquals(v2NumTask, v2.getTotalTasks()); + + // v3 skip initialization assertEquals(VertexState.RUNNING, v3.getState()); + assertEquals(v3InitedTime, v3.initedTime); + assertEquals(v3StartedTime, v3.startedTime); + assertEquals(v3NumTask, v3.getTotalTasks()); + } + + /** + * RecoveryEvents: + * DAG: DAGInitedEvent -> DAGStartedEvent + * V1: VertexReconfigrationDoneEvent -> VertexInitializedEvent + * V2: VertexReconfigrationDoneEvent -> VertexInitializedEvent -> VertexStartedEvent -> VertexTaskStart + * V3: VertexReconfigrationDoneEvent -> VertexInitializedEvent -> VertexStartedEvent -> VertexTaskStart + * + * Reinitialize V1 again. + * V2 skip initialization. + * Reinitialize V3 again. Since V3 is dependent on V1 + */ + @Test(timeout=5000) + public void testMultipleVertexRecoverFromVertex() { + initMockDAGRecoveryDataForVertex(); + List inputGeneratedTezEvents = new ArrayList(); + VertexInitializedEvent v1InitedEvent = new VertexInitializedEvent(v1Id, + "vertex1", 0L, v1InitedTime, + v1NumTask, "", null, inputGeneratedTezEvents, null); + VertexInitializedEvent v2InitedEvent = new VertexInitializedEvent(v2Id, + "vertex2", 0L, v2InitedTime, + v2NumTask, "", null, inputGeneratedTezEvents, null); + VertexInitializedEvent v3InitedEvent = new VertexInitializedEvent(v3Id, + "vertex3", 0L, v3InitedTime, + v3NumTask, "", null, inputGeneratedTezEvents, null); + + VertexConfigurationDoneEvent v2ReconfigureDoneEvent = new VertexConfigurationDoneEvent(v2Id, + 0L, v2NumTask, null, null, null, true); + VertexConfigurationDoneEvent v3ReconfigureDoneEvent = new VertexConfigurationDoneEvent(v3Id, + 0L, v3NumTask, null, null, null, true); + + VertexStartedEvent v2StartedEvent = new VertexStartedEvent(v2Id, 0L, v2StartedTime); + VertexStartedEvent v3StartedEvent = new VertexStartedEvent(v3Id, 0L, v3StartedTime); + + TaskStartedEvent v2taskStartedEvent = new TaskStartedEvent(t1v2Id, "vertex2", 0L, 0L); + TaskRecoveryData v2taskRecoveryData = new TaskRecoveryData(v2taskStartedEvent, null, null); + Map v2taskRecoveryDataMap = new HashMap<>(); + // put dummy tasks + v2taskRecoveryDataMap.put(t1v2Id, v2taskRecoveryData); + + TaskStartedEvent v3taskStartedEvent = new TaskStartedEvent(t1v3Id, "vertex3", 0L, 0L); + TaskRecoveryData v3taskRecoveryData = new TaskRecoveryData(v3taskStartedEvent, null, null); + Map v3taskRecoveryDataMap = new HashMap<>(); + // put dummy tasks + v3taskRecoveryDataMap.put(t1v3Id, v3taskRecoveryData); + + VertexRecoveryData vertex1RecoveryData = new VertexRecoveryData(v1InitedEvent, + null, null, null, null, false); + VertexRecoveryData vertex2RecoveryData = new VertexRecoveryData(v2InitedEvent, + v2ReconfigureDoneEvent, v2StartedEvent, null, v2taskRecoveryDataMap, false); + VertexRecoveryData vertex3RecoveryData = new VertexRecoveryData(v3InitedEvent, + v3ReconfigureDoneEvent, v3StartedEvent, null, v3taskRecoveryDataMap, false); + + doReturn(vertex1RecoveryData).when(dagRecoveryData).getVertexRecoveryData(v1Id); + doReturn(vertex2RecoveryData).when(dagRecoveryData).getVertexRecoveryData(v2Id); + doReturn(vertex3RecoveryData).when(dagRecoveryData).getVertexRecoveryData(v3Id); + + DAGEventRecoverEvent recoveryEvent = new DAGEventRecoverEvent(dagId, dagRecoveryData); + dag.handle(recoveryEvent); + dispatcher.await(); + + VertexImpl v1 = (VertexImpl)dag.getVertex("vertex1"); + VertexImpl v2 = (VertexImpl)dag.getVertex("vertex2"); + VertexImpl v3 = (VertexImpl)dag.getVertex("vertex3"); + assertEquals(DAGState.RUNNING, dag.getState()); + + // reinitialize v1 + assertEquals(VertexState.INITIALIZING, v1.getState()); + + // v2 skip initialization + assertEquals(VertexState.RUNNING, v2.getState()); + assertEquals(v2InitedTime, v2.initedTime); + assertEquals(v2StartedTime, v2.startedTime); + assertEquals(v2NumTask, v2.getTotalTasks()); + + // reinitialize v3 + assertEquals(VertexState.INITED, v3.getState()); } /////////////////////////////// Task //////////////////////////////////////////////////////////// @@ -808,8 +1068,13 @@ private void initMockDAGRecoveryDataForTask() { VertexConfigurationDoneEvent v1ReconfigureDoneEvent = new VertexConfigurationDoneEvent(v1Id, 0L, v1NumTask, null, null, rootInputSpecs, true); VertexStartedEvent v1StartedEvent = new VertexStartedEvent(v1Id, 0L, v1StartedTime); + TaskStartedEvent v1taskStartedEvent = new TaskStartedEvent(t1v1Id, "vertex1", 0L, 0L); + TaskRecoveryData v1taskRecoveryData = new TaskRecoveryData(v1taskStartedEvent, null, null); + Map v1taskRecoveryDataMap = new HashMap<>(); + // put dummy tasks + v1taskRecoveryDataMap.put(t1v1Id, v1taskRecoveryData); VertexRecoveryData v1RecoveryData = new VertexRecoveryData(v1InitedEvent, - v1ReconfigureDoneEvent, v1StartedEvent, null, new HashMap(), false); + v1ReconfigureDoneEvent, v1StartedEvent, null, v1taskRecoveryDataMap, false); DAGInitializedEvent dagInitedEvent = new DAGInitializedEvent(dagId, dagInitedTime, "user", "dagName", null); diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestDAGScheduler.java b/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestDAGScheduler.java index f38f6890fc..e2df050799 100644 --- a/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestDAGScheduler.java +++ b/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestDAGScheduler.java @@ -39,7 +39,7 @@ public class TestDAGScheduler { - class MockEventHandler implements EventHandler { + static class MockEventHandler implements EventHandler { TaskAttemptEventSchedule event; List events = Lists.newLinkedList(); @Override @@ -58,14 +58,19 @@ public void testDAGSchedulerNaturalOrder() { TaskAttempt mockAttempt = mock(TaskAttempt.class); when(mockDag.getVertex((TezVertexID) any())).thenReturn(mockVertex); when(mockDag.getTotalVertices()).thenReturn(4); - when(mockVertex.getDistanceFromRoot()).thenReturn(0).thenReturn(1) - .thenReturn(2); + when(mockVertex.getDistanceFromRoot()) + .thenReturn(0).thenReturn(0) + .thenReturn(1).thenReturn(1) + .thenReturn(2).thenReturn(2); TezVertexID vId0 = TezVertexID.fromString("vertex_1436907267600_195589_1_00"); TezVertexID vId1 = TezVertexID.fromString("vertex_1436907267600_195589_1_01"); TezVertexID vId2 = TezVertexID.fromString("vertex_1436907267600_195589_1_02"); TezVertexID vId3 = TezVertexID.fromString("vertex_1436907267600_195589_1_03"); - when(mockVertex.getVertexId()).thenReturn(vId0).thenReturn(vId1) - .thenReturn(vId2).thenReturn(vId3); + when(mockVertex.getVertexId()) + .thenReturn(vId0).thenReturn(vId0) + .thenReturn(vId1).thenReturn(vId1) + .thenReturn(vId2).thenReturn(vId2) + .thenReturn(vId3).thenReturn(vId3); DAGEventSchedulerUpdate event = new DAGEventSchedulerUpdate( DAGEventSchedulerUpdate.UpdateType.TA_SCHEDULE, mockAttempt); @@ -109,17 +114,17 @@ public void testConcurrencyLimit() { // schedule beyond limit and it gets scheduled mockAttempt = mock(TaskAttempt.class); - when(mockAttempt.getID()).thenReturn(TezTaskAttemptID.getInstance(tId0, 0)); + when(mockAttempt.getTaskAttemptID()).thenReturn(TezTaskAttemptID.getInstance(tId0, 0)); scheduler.scheduleTask(new DAGEventSchedulerUpdate( DAGEventSchedulerUpdate.UpdateType.TA_SCHEDULE, mockAttempt)); Assert.assertEquals(1, mockEventHandler.events.size()); mockAttempt = mock(TaskAttempt.class); - when(mockAttempt.getID()).thenReturn(TezTaskAttemptID.getInstance(tId0, 1)); + when(mockAttempt.getTaskAttemptID()).thenReturn(TezTaskAttemptID.getInstance(tId0, 1)); scheduler.scheduleTask(new DAGEventSchedulerUpdate( DAGEventSchedulerUpdate.UpdateType.TA_SCHEDULE, mockAttempt)); Assert.assertEquals(2, mockEventHandler.events.size()); mockAttempt = mock(TaskAttempt.class); - when(mockAttempt.getID()).thenReturn(TezTaskAttemptID.getInstance(tId0, 2)); + when(mockAttempt.getTaskAttemptID()).thenReturn(TezTaskAttemptID.getInstance(tId0, 2)); scheduler.scheduleTask(new DAGEventSchedulerUpdate( DAGEventSchedulerUpdate.UpdateType.TA_SCHEDULE, mockAttempt)); Assert.assertEquals(3, mockEventHandler.events.size()); @@ -134,34 +139,34 @@ public void testConcurrencyLimit() { // schedule beyond limit and it gets buffered mockAttempt = mock(TaskAttempt.class); mockAttempts.add(mockAttempt); - when(mockAttempt.getID()).thenReturn(TezTaskAttemptID.getInstance(tId1, requested++)); + when(mockAttempt.getTaskAttemptID()).thenReturn(TezTaskAttemptID.getInstance(tId1, requested++)); scheduler.scheduleTask(new DAGEventSchedulerUpdate( DAGEventSchedulerUpdate.UpdateType.TA_SCHEDULE, mockAttempt)); Assert.assertEquals(scheduled+1, mockEventHandler.events.size()); // scheduled - Assert.assertEquals(mockAttempts.get(scheduled).getID(), + Assert.assertEquals(mockAttempts.get(scheduled).getTaskAttemptID(), mockEventHandler.events.get(scheduled).getTaskAttemptID()); // matches order scheduled++; mockAttempt = mock(TaskAttempt.class); mockAttempts.add(mockAttempt); - when(mockAttempt.getID()).thenReturn(TezTaskAttemptID.getInstance(tId1, requested++)); + when(mockAttempt.getTaskAttemptID()).thenReturn(TezTaskAttemptID.getInstance(tId1, requested++)); scheduler.scheduleTask(new DAGEventSchedulerUpdate( DAGEventSchedulerUpdate.UpdateType.TA_SCHEDULE, mockAttempt)); Assert.assertEquals(scheduled+1, mockEventHandler.events.size()); // scheduled - Assert.assertEquals(mockAttempts.get(scheduled).getID(), + Assert.assertEquals(mockAttempts.get(scheduled).getTaskAttemptID(), mockEventHandler.events.get(scheduled).getTaskAttemptID()); // matches order scheduled++; mockAttempt = mock(TaskAttempt.class); mockAttempts.add(mockAttempt); - when(mockAttempt.getID()).thenReturn(TezTaskAttemptID.getInstance(tId1, requested++)); + when(mockAttempt.getTaskAttemptID()).thenReturn(TezTaskAttemptID.getInstance(tId1, requested++)); scheduler.scheduleTask(new DAGEventSchedulerUpdate( DAGEventSchedulerUpdate.UpdateType.TA_SCHEDULE, mockAttempt)); Assert.assertEquals(scheduled, mockEventHandler.events.size()); // buffered mockAttempt = mock(TaskAttempt.class); mockAttempts.add(mockAttempt); - when(mockAttempt.getID()).thenReturn(TezTaskAttemptID.getInstance(tId1, requested++)); + when(mockAttempt.getTaskAttemptID()).thenReturn(TezTaskAttemptID.getInstance(tId1, requested++)); scheduler.scheduleTask(new DAGEventSchedulerUpdate( DAGEventSchedulerUpdate.UpdateType.TA_SCHEDULE, mockAttempt)); Assert.assertEquals(scheduled, mockEventHandler.events.size()); // buffered @@ -169,14 +174,14 @@ public void testConcurrencyLimit() { scheduler.taskCompleted(new DAGEventSchedulerUpdate( DAGEventSchedulerUpdate.UpdateType.TA_COMPLETED, mockAttempts.get(completed++))); Assert.assertEquals(scheduled+1, mockEventHandler.events.size()); // scheduled - Assert.assertEquals(mockAttempts.get(scheduled).getID(), + Assert.assertEquals(mockAttempts.get(scheduled).getTaskAttemptID(), mockEventHandler.events.get(scheduled).getTaskAttemptID()); // matches order scheduled++; scheduler.taskCompleted(new DAGEventSchedulerUpdate( DAGEventSchedulerUpdate.UpdateType.TA_COMPLETED, mockAttempts.get(completed++))); Assert.assertEquals(scheduled+1, mockEventHandler.events.size()); // scheduled - Assert.assertEquals(mockAttempts.get(scheduled).getID(), + Assert.assertEquals(mockAttempts.get(scheduled).getTaskAttemptID(), mockEventHandler.events.get(scheduled).getTaskAttemptID()); // matches order scheduled++; @@ -186,14 +191,12 @@ public void testConcurrencyLimit() { mockAttempt = mock(TaskAttempt.class); mockAttempts.add(mockAttempt); - when(mockAttempt.getID()).thenReturn(TezTaskAttemptID.getInstance(tId1, requested++)); + when(mockAttempt.getTaskAttemptID()).thenReturn(TezTaskAttemptID.getInstance(tId1, requested++)); scheduler.scheduleTask(new DAGEventSchedulerUpdate( DAGEventSchedulerUpdate.UpdateType.TA_SCHEDULE, mockAttempt)); Assert.assertEquals(scheduled+1, mockEventHandler.events.size()); // scheduled - Assert.assertEquals(mockAttempts.get(scheduled).getID(), + Assert.assertEquals(mockAttempts.get(scheduled).getTaskAttemptID(), mockEventHandler.events.get(scheduled).getTaskAttemptID()); // matches order - scheduled++; - } @Test(timeout=5000) @@ -215,7 +218,6 @@ public void testConcurrencyLimitWithKilledNonRunningTask() { mockEventHandler); List mockAttempts = Lists.newArrayList(); - int completed = 0; int requested = 0; int scheduled = 0; scheduler.addVertexConcurrencyLimit(vId0, 1); // effective @@ -223,24 +225,24 @@ public void testConcurrencyLimitWithKilledNonRunningTask() { // schedule beyond limit and it gets buffered mockAttempt = mock(TaskAttempt.class); mockAttempts.add(mockAttempt); - when(mockAttempt.getID()).thenReturn(TezTaskAttemptID.getInstance(tId0, requested++)); + when(mockAttempt.getTaskAttemptID()).thenReturn(TezTaskAttemptID.getInstance(tId0, requested++)); scheduler.scheduleTask(new DAGEventSchedulerUpdate( DAGEventSchedulerUpdate.UpdateType.TA_SCHEDULE, mockAttempt)); Assert.assertEquals(scheduled+1, mockEventHandler.events.size()); // scheduled - Assert.assertEquals(mockAttempts.get(scheduled).getID(), + Assert.assertEquals(mockAttempts.get(scheduled).getTaskAttemptID(), mockEventHandler.events.get(scheduled).getTaskAttemptID()); // matches order scheduled++; mockAttempt = mock(TaskAttempt.class); mockAttempts.add(mockAttempt); - when(mockAttempt.getID()).thenReturn(TezTaskAttemptID.getInstance(tId0, requested++)); + when(mockAttempt.getTaskAttemptID()).thenReturn(TezTaskAttemptID.getInstance(tId0, requested++)); scheduler.scheduleTask(new DAGEventSchedulerUpdate( DAGEventSchedulerUpdate.UpdateType.TA_SCHEDULE, mockAttempt)); Assert.assertEquals(scheduled, mockEventHandler.events.size()); // buffered mockAttempt = mock(TaskAttempt.class); mockAttempts.add(mockAttempt); - when(mockAttempt.getID()).thenReturn(TezTaskAttemptID.getInstance(tId0, requested++)); + when(mockAttempt.getTaskAttemptID()).thenReturn(TezTaskAttemptID.getInstance(tId0, requested++)); scheduler.scheduleTask(new DAGEventSchedulerUpdate( DAGEventSchedulerUpdate.UpdateType.TA_SCHEDULE, mockAttempt)); Assert.assertEquals(scheduled, mockEventHandler.events.size()); // buffered @@ -248,7 +250,7 @@ public void testConcurrencyLimitWithKilledNonRunningTask() { scheduler.taskCompleted(new DAGEventSchedulerUpdate( DAGEventSchedulerUpdate.UpdateType.TA_COMPLETED, mockAttempts.get(1))); Assert.assertEquals(scheduled, mockEventHandler.events.size()); // buffered - Assert.assertEquals(mockAttempts.get(0).getID(), + Assert.assertEquals(mockAttempts.get(0).getTaskAttemptID(), mockEventHandler.events.get(0).getTaskAttemptID()); // matches order } diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestDAGSchedulerNaturalOrderControlled.java b/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestDAGSchedulerNaturalOrderControlled.java index 63137c716a..f79f359a7b 100644 --- a/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestDAGSchedulerNaturalOrderControlled.java +++ b/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestDAGSchedulerNaturalOrderControlled.java @@ -19,7 +19,7 @@ package org.apache.tez.dag.app.dag.impl; import static org.junit.Assert.assertEquals; -import static org.mockito.Matchers.any; +import static org.mockito.Mockito.any; import static org.mockito.Mockito.doReturn; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.never; @@ -65,35 +65,35 @@ public void testSimpleFlow() { for (int i = 0; i < vertices[0].getTotalTasks(); i++) { dagScheduler.scheduleTaskEx(createScheduleRequest(vertices[0].getVertexId(), i, 0)); } - verify(eventHandler, times(vertices[0].getTotalTasks())).handle(any(Event.class)); + verify(eventHandler, times(vertices[0].getTotalTasks())).handle(any()); reset(eventHandler); // Schedule 3 tasks belonging to v2 for (int i = 0; i < 3; i++) { dagScheduler.scheduleTaskEx(createScheduleRequest(vertices[2].getVertexId(), i, 0)); } - verify(eventHandler, times(3)).handle(any(Event.class)); + verify(eventHandler, times(3)).handle(any()); reset(eventHandler); // Schedule 3 tasks belonging to v3 for (int i = 0; i < 3; i++) { dagScheduler.scheduleTaskEx(createScheduleRequest(vertices[3].getVertexId(), i, 0)); } - verify(eventHandler, times(3)).handle(any(Event.class)); + verify(eventHandler, times(3)).handle(any()); reset(eventHandler); // Schedule remaining tasks belonging to v2 for (int i = 3; i < vertices[2].getTotalTasks(); i++) { dagScheduler.scheduleTaskEx(createScheduleRequest(vertices[2].getVertexId(), i, 0)); } - verify(eventHandler, times(vertices[2].getTotalTasks() - 3)).handle(any(Event.class)); + verify(eventHandler, times(vertices[2].getTotalTasks() - 3)).handle(any()); reset(eventHandler); // Schedule remaining tasks belonging to v3 for (int i = 3; i < vertices[3].getTotalTasks(); i++) { dagScheduler.scheduleTaskEx(createScheduleRequest(vertices[3].getVertexId(), i, 0)); } - verify(eventHandler, times(vertices[3].getTotalTasks() - 3)).handle(any(Event.class)); + verify(eventHandler, times(vertices[3].getTotalTasks() - 3)).handle(any()); reset(eventHandler); @@ -101,7 +101,7 @@ public void testSimpleFlow() { for (int i = 0; i < vertices[4].getTotalTasks(); i++) { dagScheduler.scheduleTaskEx(createScheduleRequest(vertices[4].getVertexId(), i, 0)); } - verify(eventHandler, times(vertices[4].getTotalTasks())).handle(any(Event.class)); + verify(eventHandler, times(vertices[4].getTotalTasks())).handle(any()); reset(eventHandler); } @@ -124,7 +124,7 @@ public void testSourceRequestDelayed() { for (int i = 0; i < vertices[0].getTotalTasks(); i++) { dagScheduler.scheduleTaskEx(createScheduleRequest(vertices[0].getVertexId(), i, 0)); } - verify(eventHandler, times(vertices[0].getTotalTasks())).handle(any(Event.class)); + verify(eventHandler, times(vertices[0].getTotalTasks())).handle(any()); reset(eventHandler); // v2 behaving as if configured with slow-start. @@ -132,14 +132,14 @@ public void testSourceRequestDelayed() { for (int i = 0; i < vertices[3].getTotalTasks(); i++) { dagScheduler.scheduleTaskEx(createScheduleRequest(vertices[3].getVertexId(), i, 0)); } - verify(eventHandler, times(vertices[3].getTotalTasks())).handle(any(Event.class)); + verify(eventHandler, times(vertices[3].getTotalTasks())).handle(any()); reset(eventHandler); // Scheduling all tasks belonging to v4. None should get scheduled. for (int i = 0; i < vertices[4].getTotalTasks(); i++) { dagScheduler.scheduleTaskEx(createScheduleRequest(vertices[4].getVertexId(), i, 0)); } - verify(eventHandler, never()).handle(any(Event.class)); + verify(eventHandler, never()).handle(any()); reset(eventHandler); // v2 now starts scheduling ... @@ -147,7 +147,7 @@ public void testSourceRequestDelayed() { for (int i = 0; i < 3; i++) { dagScheduler.scheduleTaskEx(createScheduleRequest(vertices[2].getVertexId(), i, 0)); } - verify(eventHandler, times(3)).handle(any(Event.class)); + verify(eventHandler, times(3)).handle(any()); reset(eventHandler); // Schedule remaining tasks belonging to v2 @@ -164,9 +164,9 @@ public void testSourceRequestDelayed() { for (Event raw : args.getAllValues()) { TaskAttemptEventSchedule event = (TaskAttemptEventSchedule) raw; if (count < vertices[2].getTotalTasks() - 3) { - assertEquals(2, event.getTaskAttemptID().getTaskID().getVertexID().getId()); + assertEquals(2, event.getVertexID().getId()); } else { - assertEquals(4, event.getTaskAttemptID().getTaskID().getVertexID().getId()); + assertEquals(4, event.getVertexID().getId()); } count++; } @@ -192,7 +192,7 @@ public void testParallelismUpdated() { for (int i = 0; i < vertices[0].getTotalTasks(); i++) { dagScheduler.scheduleTaskEx(createScheduleRequest(vertices[0].getVertexId(), i, 0)); } - verify(eventHandler, times(vertices[0].getTotalTasks())).handle(any(Event.class)); + verify(eventHandler, times(vertices[0].getTotalTasks())).handle(any()); reset(eventHandler); assertEquals(10, vertices[2].getTotalTasks()); @@ -202,14 +202,14 @@ public void testParallelismUpdated() { for (int i = 0; i < vertices[3].getTotalTasks(); i++) { dagScheduler.scheduleTaskEx(createScheduleRequest(vertices[3].getVertexId(), i, 0)); } - verify(eventHandler, times(vertices[3].getTotalTasks())).handle(any(Event.class)); + verify(eventHandler, times(vertices[3].getTotalTasks())).handle(any()); reset(eventHandler); // Schedule all tasks belonging to v4 for (int i = 0; i < vertices[4].getTotalTasks(); i++) { dagScheduler.scheduleTaskEx(createScheduleRequest(vertices[4].getVertexId(), i, 0)); } - verify(eventHandler, never()).handle(any(Event.class)); + verify(eventHandler, never()).handle(any()); reset(eventHandler); // Reset the parallelism for v2. @@ -221,7 +221,7 @@ public void testParallelismUpdated() { dagScheduler.scheduleTaskEx(createScheduleRequest(vertices[2].getVertexId(), i, 0)); } verify(eventHandler, times(vertices[2].getTotalTasks() + vertices[4].getTotalTasks())) - .handle(any(Event.class)); + .handle(any()); reset(eventHandler); } @@ -243,7 +243,7 @@ public void testMultipleRequestsForSameTask() { for (int i = 0; i < vertices[0].getTotalTasks() - 1; i++) { dagScheduler.scheduleTaskEx(createScheduleRequest(vertices[0].getVertexId(), i, 0)); } - verify(eventHandler, times(vertices[0].getTotalTasks() - 1)).handle(any(Event.class)); + verify(eventHandler, times(vertices[0].getTotalTasks() - 1)).handle(any()); reset(eventHandler); @@ -252,7 +252,7 @@ public void testMultipleRequestsForSameTask() { dagScheduler.scheduleTaskEx(createScheduleRequest(vertices[2].getVertexId(), i, 0)); } // Nothing should be scheduled - verify(eventHandler, never()).handle(any(Event.class)); + verify(eventHandler, never()).handle(any()); reset(eventHandler); // Schedule an extra attempt for all but 1 task belonging to v0 @@ -260,14 +260,14 @@ public void testMultipleRequestsForSameTask() { dagScheduler.scheduleTaskEx(createScheduleRequest(vertices[0].getVertexId(), i, 1)); } // Only v0 requests should have gone out - verify(eventHandler, times(vertices[0].getTotalTasks() - 1)).handle(any(Event.class)); + verify(eventHandler, times(vertices[0].getTotalTasks() - 1)).handle(any()); reset(eventHandler); // Schedule last task of v0, with attempt 1 dagScheduler.scheduleTaskEx( createScheduleRequest(vertices[0].getVertexId(), vertices[0].getTotalTasks() - 1, 1)); // One v0 request and all of v2 should have gone out - verify(eventHandler, times(1 + vertices[2].getTotalTasks())).handle(any(Event.class)); + verify(eventHandler, times(1 + vertices[2].getTotalTasks())).handle(any()); } @@ -364,7 +364,7 @@ private TaskAttempt createTaskAttempt(TezVertexID vertexId, int taskIdInt, int a TaskAttempt taskAttempt = mock(TaskAttempt.class); TezTaskID taskId = TezTaskID.getInstance(vertexId, taskIdInt); TezTaskAttemptID taskAttemptId = TezTaskAttemptID.getInstance(taskId, attemptIdInt); - doReturn(taskAttemptId).when(taskAttempt).getID(); + doReturn(taskAttemptId).when(taskAttempt).getTaskAttemptID(); doReturn(vertexId).when(taskAttempt).getVertexID(); return taskAttempt; } diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestEdge.java b/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestEdge.java index 1143395204..d81721303c 100644 --- a/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestEdge.java +++ b/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestEdge.java @@ -21,7 +21,7 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNull; import static org.junit.Assert.assertTrue; -import static org.mockito.Matchers.eq; +import static org.mockito.Mockito.eq; import static org.mockito.Mockito.doReturn; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.reset; @@ -155,7 +155,7 @@ public void testScatterGatherManager() { } catch (IllegalArgumentException e) { e.printStackTrace(); Assert.assertTrue(e.getMessage() - .contains("ScatteGather edge manager must have destination vertex task parallelism specified")); + .contains("ScatterGather edge manager must have destination vertex task parallelism specified")); } when(mockContext.getDestinationVertexNumTasks()).thenReturn(0); manager.getNumSourceTaskPhysicalOutputs(0); @@ -232,9 +232,9 @@ private void verifyEvents(TezTaskAttemptID srcTAID, LinkedHashMap tasks) { for (Task task : tasks) { - TezTaskID taskID = task.getTaskId(); + TezTaskID taskID = task.getTaskID(); reset(task); - doReturn(taskID).when(task).getTaskId(); + doReturn(taskID).when(task).getTaskID(); } } @@ -243,7 +243,7 @@ private LinkedHashMap mockTasks(TezVertexID vertexID, int numTa for (int i = 0 ; i < numTasks ; i++) { Task task = mock(Task.class); TezTaskID taskID = TezTaskID.getInstance(vertexID, i); - doReturn(taskID).when(task).getTaskId(); + doReturn(taskID).when(task).getTaskID(); tasks.put(taskID, task); } return tasks; diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestImmediateStartVertexManager.java b/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestImmediateStartVertexManager.java index a17c7c5b45..67a9da52b1 100644 --- a/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestImmediateStartVertexManager.java +++ b/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestImmediateStartVertexManager.java @@ -37,7 +37,7 @@ import org.mockito.invocation.InvocationOnMock; import org.mockito.stubbing.Answer; -import static org.mockito.Matchers.anyList; +import static org.mockito.Mockito.anyList; import static org.mockito.Mockito.anySet; import static org.mockito.Mockito.anyString; import static org.mockito.Mockito.doAnswer; diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestRootInputVertexManager.java b/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestRootInputVertexManager.java index 16a97d4374..144d28d4a5 100644 --- a/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestRootInputVertexManager.java +++ b/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestRootInputVertexManager.java @@ -23,11 +23,11 @@ import static org.apache.tez.dag.app.dag.impl.RootInputVertexManager.TEZ_ROOT_INPUT_VERTEX_MANAGER_MIN_SRC_FRACTION; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; -import static org.mockito.Matchers.any; -import static org.mockito.Matchers.anyList; -import static org.mockito.Matchers.eq; +import static org.mockito.Mockito.any; +import static org.mockito.Mockito.anyList; import static org.mockito.Mockito.doAnswer; import static org.mockito.Mockito.doReturn; +import static org.mockito.Mockito.eq; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; @@ -161,7 +161,7 @@ public void testRootInputVertexManagerSlowStart() { VertexManagerPluginContext mockContext = mock(VertexManagerPluginContext.class); - when(mockContext.getVertexStatistics(any(String.class))) + when(mockContext.getVertexStatistics(any())) .thenReturn(mock(VertexStatistics.class)); when(mockContext.getInputVertexEdgeProperties()) .thenReturn(mockInputVertices); @@ -508,7 +508,7 @@ public void testTezDrainCompletionsOnVertexStart() throws IOException { VertexManagerPluginContext mockContext = mock(VertexManagerPluginContext.class); - when(mockContext.getVertexStatistics(any(String.class))) + when(mockContext.getVertexStatistics(any())) .thenReturn(mock(VertexStatistics.class)); when(mockContext.getInputVertexEdgeProperties()) .thenReturn(mockInputVertices); @@ -568,6 +568,7 @@ public ScheduledTasksAnswer(List scheduledTasks) { public Object answer(InvocationOnMock invocation) throws IOException { Object[] args = invocation.getArguments(); scheduledTasks.clear(); + @SuppressWarnings("unchecked") List tasks = (List)args[0]; for (VertexManagerPluginContext.ScheduleTaskRequest task : tasks) { diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestTaskAttempt.java b/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestTaskAttempt.java index a9d0c8d3df..34a57a5f6b 100644 --- a/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestTaskAttempt.java +++ b/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestTaskAttempt.java @@ -18,10 +18,15 @@ package org.apache.tez.dag.app.dag.impl; +import org.apache.hadoop.yarn.util.MonotonicClock; +import org.apache.tez.common.counters.DAGCounter; +import org.apache.tez.dag.app.MockClock; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.fail; -import static org.mockito.Matchers.anyInt; +import static org.mockito.Mockito.any; +import static org.mockito.Mockito.anyInt; +import static org.mockito.Mockito.anyString; import static org.mockito.Mockito.atLeast; import static org.mockito.Mockito.doReturn; import static org.mockito.Mockito.mock; @@ -62,6 +67,7 @@ import org.apache.log4j.Level; import org.apache.log4j.LogManager; import org.apache.tez.common.MockDNSToSwitchMapping; +import org.apache.tez.common.counters.TezCounters; import org.apache.tez.dag.api.TezConstants; import org.apache.tez.dag.app.dag.event.TaskAttemptEventSubmitted; import org.apache.tez.dag.app.dag.event.TaskEventTAFailed; @@ -80,6 +86,8 @@ import org.apache.tez.dag.app.TaskCommunicatorWrapper; import org.apache.tez.dag.app.TaskHeartbeatHandler; import org.apache.tez.dag.app.dag.TaskAttemptStateInternal; +import org.apache.tez.dag.app.dag.DAG; +import org.apache.tez.dag.app.dag.Task; import org.apache.tez.dag.app.dag.Vertex; import org.apache.tez.dag.app.dag.event.DAGEvent; import org.apache.tez.dag.app.dag.event.DAGEventCounterUpdate; @@ -104,6 +112,7 @@ import org.apache.tez.dag.app.rm.AMSchedulerEventTALaunchRequest; import org.apache.tez.dag.app.rm.container.AMContainerMap; import org.apache.tez.dag.app.rm.container.ContainerContextMatcher; +import org.apache.tez.dag.app.rm.node.AMNodeTracker; import org.apache.tez.dag.history.DAGHistoryEvent; import org.apache.tez.dag.history.HistoryEventHandler; import org.apache.tez.dag.history.events.TaskAttemptFinishedEvent; @@ -123,14 +132,12 @@ import org.junit.BeforeClass; import org.junit.Test; import org.mockito.ArgumentCaptor; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; + +import com.google.common.collect.Maps; @SuppressWarnings({ "unchecked", "rawtypes" }) public class TestTaskAttempt { - private static final Logger LOG = LoggerFactory.getLogger(TestTaskAttempt.class); - static public class StubbedFS extends RawLocalFileSystem { @Override public FileStatus getFileStatus(Path f) throws IOException { @@ -142,6 +149,7 @@ public FileStatus getFileStatus(Path f) throws IOException { TezConfiguration vertexConf = new TezConfiguration(); TaskLocationHint locationHint; Vertex mockVertex; + Task mockTask; ServicePluginInfo servicePluginInfo = new ServicePluginInfo() .setContainerLauncherName(TezConstants.getTezYarnServicePluginName()); @@ -157,15 +165,29 @@ public void setupTest() { when(appCtx.getContainerLauncherName(anyInt())).thenReturn( TezConstants.getTezYarnServicePluginName()); - mockVertex = mock(Vertex.class); - when(mockVertex.getServicePluginInfo()).thenReturn(servicePluginInfo); - when(mockVertex.getVertexConfig()).thenReturn(new VertexImpl.VertexConfigImpl(vertexConf)); + createMockVertex(vertexConf); + mockTask = mock(Task.class); + when(mockTask.getVertex()).thenReturn(mockVertex); HistoryEventHandler mockHistHandler = mock(HistoryEventHandler.class); doReturn(mockHistHandler).when(appCtx).getHistoryHandler(); LogManager.getRootLogger().setLevel(Level.DEBUG); } + private void createMockVertex(Configuration conf) { + mockVertex = mock(Vertex.class); + when(mockVertex.getDownstreamBlamingHosts()).thenReturn(Maps.newHashMap()); + when(mockVertex.getServicePluginInfo()).thenReturn(servicePluginInfo); + when(mockVertex.getVertexConfig()).thenReturn(new VertexImpl.VertexConfigImpl(conf)); + AppContext appContext = mock(AppContext.class); + when(appContext.getTaskScheduerIdentifier(anyString())).thenReturn(0); + when(mockVertex.getAppContext()).thenReturn(appContext); + AMNodeTracker nodeTracker = mock(AMNodeTracker.class); + when(nodeTracker.getNumNodes(anyInt())).thenReturn(10); + when(nodeTracker.getNumActiveNodes(anyInt())).thenReturn(8); + when(appContext.getNodeTracker()).thenReturn(nodeTracker); + } + @Test(timeout = 5000) public void testLocalityRequest() { TaskAttemptImpl.ScheduleTaskattemptTransition sta = @@ -212,8 +234,14 @@ public void testRetriesAtSamePriorityConfig() { // Override the test defaults to setup the config change TezConfiguration vertexConf = new TezConfiguration(); vertexConf.setBoolean(TezConfiguration.TEZ_AM_TASK_RESCHEDULE_HIGHER_PRIORITY, false); + vertexConf.setBoolean(TezConfiguration.TEZ_AM_TASK_RESCHEDULE_RELAXED_LOCALITY, true); when(mockVertex.getVertexConfig()).thenReturn(new VertexImpl.VertexConfigImpl(vertexConf)); + // set locality + Set hosts = new TreeSet(); + hosts.add("host1"); + locationHint = TaskLocationHint.createTaskLocationHint(hosts, null); + TaskAttemptImpl.ScheduleTaskattemptTransition sta = new TaskAttemptImpl.ScheduleTaskattemptTransition(); @@ -241,12 +269,15 @@ public void testRetriesAtSamePriorityConfig() { verify(eventHandler, times(1)).handle(arg.capture()); AMSchedulerEventTALaunchRequest launchEvent = (AMSchedulerEventTALaunchRequest) arg.getValue(); Assert.assertEquals(2, launchEvent.getPriority()); + Assert.assertEquals(1, launchEvent.getLocationHint().getHosts().size()); + Assert.assertTrue(launchEvent.getLocationHint().getHosts().contains("host1")); // Verify priority for a retried attempt is the same sta.transition(taImplReScheduled, sEvent); verify(eventHandler, times(2)).handle(arg.capture()); launchEvent = (AMSchedulerEventTALaunchRequest) arg.getValue(); Assert.assertEquals(2, launchEvent.getPriority()); + Assert.assertNull(launchEvent.getLocationHint()); } @Test(timeout = 5000) @@ -393,7 +424,7 @@ taListener, taskConf, new SystemClock(), taImpl.handle(new TaskAttemptEventKillRequest(taskAttemptID, null, TaskAttemptTerminationCause.TERMINATED_BY_CLIENT)); assertEquals(TaskAttemptStateInternal.KILL_IN_PROGRESS, taImpl.getInternalState()); - taImpl.handle(new TaskAttemptEventTezEventUpdate(taImpl.getID(), Collections.EMPTY_LIST)); + taImpl.handle(new TaskAttemptEventTezEventUpdate(taImpl.getTaskAttemptID(), Collections.EMPTY_LIST)); assertFalse( "InternalError occurred trying to handle TA_TEZ_EVENT_UPDATE in KILL_IN_PROGRESS state", eventHandler.internalError); @@ -448,7 +479,7 @@ public void testContainerTerminationWhileRunning() throws Exception { taListener, taskConf, new SystemClock(), mockHeartbeatHandler, appCtx, false, resource, createFakeContainerContext(), false); - TezTaskAttemptID taskAttemptID = taImpl.getID(); + TezTaskAttemptID taskAttemptID = taImpl.getTaskAttemptID(); ArgumentCaptor arg = ArgumentCaptor.forClass(Event.class); taImpl.handle(new TaskAttemptEventSchedule(taskAttemptID, 0, 0)); @@ -479,27 +510,27 @@ taListener, taskConf, new SystemClock(), assertEquals("Terminating", taImpl.getDiagnostics().get(0)); assertEquals(TaskAttemptTerminationCause.APPLICATION_ERROR, taImpl.getTerminationCause()); - int expectedEvenstAfterTerminating = expectedEventsAtRunning + 3; + int expectedEventsAfterTerminating = expectedEventsAtRunning + 3; arg = ArgumentCaptor.forClass(Event.class); - verify(eventHandler, times(expectedEvenstAfterTerminating)).handle(arg.capture()); + verify(eventHandler, times(expectedEventsAfterTerminating)).handle(arg.capture()); Event event = verifyEventType( arg.getAllValues().subList(expectedEventsAtRunning, - expectedEvenstAfterTerminating), TaskEventTAFailed.class, 1); + expectedEventsAfterTerminating), TaskEventTAFailed.class, 1); TaskEventTAFailed failedEvent = (TaskEventTAFailed) event; assertEquals(TaskFailureType.NON_FATAL, failedEvent.getTaskFailureType()); verifyEventType( arg.getAllValues().subList(expectedEventsAtRunning, - expectedEvenstAfterTerminating), AMSchedulerEventTAEnded.class, 1); + expectedEventsAfterTerminating), AMSchedulerEventTAEnded.class, 1); verifyEventType( arg.getAllValues().subList(expectedEventsAtRunning, - expectedEvenstAfterTerminating), DAGEventCounterUpdate.class, 1); + expectedEventsAfterTerminating), DAGEventCounterUpdate.class, 1); taImpl.handle(new TaskAttemptEventContainerTerminated(contId, taskAttemptID, "Terminated", TaskAttemptTerminationCause.CONTAINER_EXITED)); // verify unregister is not invoked again verify(mockHeartbeatHandler, times(1)).unregister(taskAttemptID); - int expectedEventAfterTerminated = expectedEvenstAfterTerminating + 0; + int expectedEventAfterTerminated = expectedEventsAfterTerminating + 0; arg = ArgumentCaptor.forClass(Event.class); verify(eventHandler, times(expectedEventAfterTerminated)).handle(arg.capture()); @@ -553,7 +584,7 @@ public void testContainerTerminatedWhileRunning() throws Exception { taListener, taskConf, new SystemClock(), mockHeartbeatHandler, appCtx, false, resource, createFakeContainerContext(), false); - TezTaskAttemptID taskAttemptID = taImpl.getID(); + TezTaskAttemptID taskAttemptID = taImpl.getTaskAttemptID(); ArgumentCaptor arg = ArgumentCaptor.forClass(Event.class); taImpl.handle(new TaskAttemptEventSchedule(taskAttemptID, 0, 0)); taImpl.handle(new TaskAttemptEventSubmitted(taskAttemptID, contId)); @@ -574,27 +605,27 @@ taListener, taskConf, new SystemClock(), assertEquals(TaskAttemptTerminationCause.CONTAINER_EXITED, taImpl.getTerminationCause()); // TODO Ensure TA_TERMINATING after this is ingored. - int expectedEvenstAfterTerminating = expectedEventsAtRunning + 3; + int expectedEventsAfterTerminating = expectedEventsAtRunning + 3; arg = ArgumentCaptor.forClass(Event.class); - verify(eventHandler, times(expectedEvenstAfterTerminating)).handle(arg.capture()); + verify(eventHandler, times(expectedEventsAfterTerminating)).handle(arg.capture()); Event event = verifyEventType( arg.getAllValues().subList(expectedEventsAtRunning, - expectedEvenstAfterTerminating), TaskEventTAFailed.class, 1); + expectedEventsAfterTerminating), TaskEventTAFailed.class, 1); TaskEventTAFailed failedEvent = (TaskEventTAFailed) event; assertEquals(TaskFailureType.NON_FATAL, failedEvent.getTaskFailureType()); verifyEventType( arg.getAllValues().subList(expectedEventsAtRunning, - expectedEvenstAfterTerminating), AMSchedulerEventTAEnded.class, 1); + expectedEventsAfterTerminating), AMSchedulerEventTAEnded.class, 1); verifyEventType( arg.getAllValues().subList(expectedEventsAtRunning, - expectedEvenstAfterTerminating), DAGEventCounterUpdate.class, 1); + expectedEventsAfterTerminating), DAGEventCounterUpdate.class, 1); taImpl.handle(new TaskAttemptEventContainerTerminated(contId, taskAttemptID, "Terminated", TaskAttemptTerminationCause.CONTAINER_EXITED)); // verify unregister is not invoked again verify(mockHeartbeatHandler, times(1)).unregister(taskAttemptID); - int expectedEventAfterTerminated = expectedEvenstAfterTerminating + 0; + int expectedEventAfterTerminated = expectedEventsAfterTerminating + 0; arg = ArgumentCaptor.forClass(Event.class); verify(eventHandler, times(expectedEventAfterTerminated)).handle(arg.capture()); } @@ -642,7 +673,7 @@ public void testContainerTerminatedAfterSuccess() throws Exception { taListener, taskConf, new SystemClock(), mockHeartbeatHandler, appCtx, false, resource, createFakeContainerContext(), false); - TezTaskAttemptID taskAttemptID = taImpl.getID(); + TezTaskAttemptID taskAttemptID = taImpl.getTaskAttemptID(); ArgumentCaptor arg = ArgumentCaptor.forClass(Event.class); taImpl.handle(new TaskAttemptEventSchedule(taskAttemptID, 0, 0)); @@ -662,25 +693,25 @@ taListener, taskConf, new SystemClock(), verify(mockHeartbeatHandler).unregister(taskAttemptID); assertEquals(0, taImpl.getDiagnostics().size()); - int expectedEvenstAfterTerminating = expectedEventsAtRunning + 3; + int expectedEventsAfterTerminating = expectedEventsAtRunning + 3; arg = ArgumentCaptor.forClass(Event.class); - verify(eventHandler, times(expectedEvenstAfterTerminating)).handle(arg.capture()); + verify(eventHandler, times(expectedEventsAfterTerminating)).handle(arg.capture()); verifyEventType( arg.getAllValues().subList(expectedEventsAtRunning, - expectedEvenstAfterTerminating), TaskEventTASucceeded.class, 1); + expectedEventsAfterTerminating), TaskEventTASucceeded.class, 1); verifyEventType( arg.getAllValues().subList(expectedEventsAtRunning, - expectedEvenstAfterTerminating), AMSchedulerEventTAEnded.class, 1); + expectedEventsAfterTerminating), AMSchedulerEventTAEnded.class, 1); verifyEventType( arg.getAllValues().subList(expectedEventsAtRunning, - expectedEvenstAfterTerminating), DAGEventCounterUpdate.class, 1); + expectedEventsAfterTerminating), DAGEventCounterUpdate.class, 1); taImpl.handle(new TaskAttemptEventContainerTerminated(contId, taskAttemptID, "Terminated", TaskAttemptTerminationCause.CONTAINER_EXITED)); // verify unregister is not invoked again verify(mockHeartbeatHandler, times(1)).unregister(taskAttemptID); - int expectedEventAfterTerminated = expectedEvenstAfterTerminating + 0; + int expectedEventAfterTerminated = expectedEventsAfterTerminating + 0; arg = ArgumentCaptor.forClass(Event.class); verify(eventHandler, times(expectedEventAfterTerminated)).handle(arg.capture()); @@ -732,7 +763,7 @@ public void testLastDataEventRecording() throws Exception { taListener, taskConf, new SystemClock(), mockHeartbeatHandler, appCtx, false, resource, createFakeContainerContext(), false); - TezTaskAttemptID taskAttemptID = taImpl.getID(); + TezTaskAttemptID taskAttemptID = taImpl.getTaskAttemptID(); taImpl.handle(new TaskAttemptEventSchedule(taskAttemptID, 0, 0)); taImpl.handle(new TaskAttemptEventSubmitted(taskAttemptID, contId)); @@ -817,7 +848,7 @@ public void testFailure() throws Exception { taListener, taskConf, new SystemClock(), mockHeartbeatHandler, appCtx, false, resource, createFakeContainerContext(), false); - TezTaskAttemptID taskAttemptID = taImpl.getID(); + TezTaskAttemptID taskAttemptID = taImpl.getTaskAttemptID(); ArgumentCaptor arg = ArgumentCaptor.forClass(Event.class); taImpl.handle(new TaskAttemptEventSchedule(taskAttemptID, 0, 0)); @@ -847,7 +878,7 @@ taListener, taskConf, new SystemClock(), assertEquals(TaskAttemptTerminationCause.APPLICATION_ERROR, taImpl.getTerminationCause()); assertEquals(TaskAttemptStateInternal.FAIL_IN_PROGRESS, taImpl.getInternalState()); - taImpl.handle(new TaskAttemptEventTezEventUpdate(taImpl.getID(), Collections.EMPTY_LIST)); + taImpl.handle(new TaskAttemptEventTezEventUpdate(taImpl.getTaskAttemptID(), Collections.EMPTY_LIST)); assertFalse( "InternalError occurred trying to handle TA_TEZ_EVENT_UPDATE in FAIL_IN_PROGRESS state", eventHandler.internalError); @@ -861,25 +892,25 @@ taListener, taskConf, new SystemClock(), // err cause does not change assertEquals(TaskAttemptTerminationCause.APPLICATION_ERROR, taImpl.getTerminationCause()); - int expectedEvenstAfterTerminating = expectedEventsAtRunning + 5; + int expectedEventsAfterTerminating = expectedEventsAtRunning + 5; arg = ArgumentCaptor.forClass(Event.class); - verify(eventHandler, times(expectedEvenstAfterTerminating)).handle(arg.capture()); + verify(eventHandler, times(expectedEventsAfterTerminating)).handle(arg.capture()); Event e = verifyEventType( arg.getAllValues().subList(expectedEventsAtRunning, - expectedEvenstAfterTerminating), TaskEventTAFailed.class, 1); + expectedEventsAfterTerminating), TaskEventTAFailed.class, 1); TaskEventTAFailed failedEvent = (TaskEventTAFailed) e; assertEquals(TaskFailureType.NON_FATAL, failedEvent.getTaskFailureType()); verifyEventType( arg.getAllValues().subList(expectedEventsAtRunning, - expectedEvenstAfterTerminating), AMSchedulerEventTAEnded.class, 1); + expectedEventsAfterTerminating), AMSchedulerEventTAEnded.class, 1); verifyEventType( arg.getAllValues().subList(expectedEventsAtRunning, - expectedEvenstAfterTerminating), DAGEventCounterUpdate.class, 1); + expectedEventsAfterTerminating), DAGEventCounterUpdate.class, 1); verifyEventType( arg.getAllValues().subList(expectedEventsAtRunning, - expectedEvenstAfterTerminating), SpeculatorEventTaskAttemptStatusUpdate.class, 2); + expectedEventsAfterTerminating), SpeculatorEventTaskAttemptStatusUpdate.class, 2); } @Test(timeout = 5000) @@ -924,7 +955,7 @@ public void testFailureFatalError() throws Exception { taListener, taskConf, new SystemClock(), mockHeartbeatHandler, appCtx, false, resource, createFakeContainerContext(), false); - TezTaskAttemptID taskAttemptID = taImpl.getID(); + TezTaskAttemptID taskAttemptID = taImpl.getTaskAttemptID(); ArgumentCaptor arg = ArgumentCaptor.forClass(Event.class); taImpl.handle(new TaskAttemptEventSchedule(taskAttemptID, 0, 0)); @@ -954,7 +985,7 @@ taListener, taskConf, new SystemClock(), assertEquals(TaskAttemptTerminationCause.APPLICATION_ERROR, taImpl.getTerminationCause()); assertEquals(TaskAttemptStateInternal.FAIL_IN_PROGRESS, taImpl.getInternalState()); - taImpl.handle(new TaskAttemptEventTezEventUpdate(taImpl.getID(), Collections.EMPTY_LIST)); + taImpl.handle(new TaskAttemptEventTezEventUpdate(taImpl.getTaskAttemptID(), Collections.EMPTY_LIST)); assertFalse( "InternalError occurred trying to handle TA_TEZ_EVENT_UPDATE in FAIL_IN_PROGRESS state", eventHandler.internalError); @@ -968,24 +999,24 @@ taListener, taskConf, new SystemClock(), // err cause does not change assertEquals(TaskAttemptTerminationCause.APPLICATION_ERROR, taImpl.getTerminationCause()); - int expectedEvenstAfterTerminating = expectedEventsAtRunning + 5; + int expectedEventsAfterTerminating = expectedEventsAtRunning + 5; arg = ArgumentCaptor.forClass(Event.class); - verify(eventHandler, times(expectedEvenstAfterTerminating)).handle(arg.capture()); + verify(eventHandler, times(expectedEventsAfterTerminating)).handle(arg.capture()); Event e = verifyEventType( arg.getAllValues().subList(expectedEventsAtRunning, - expectedEvenstAfterTerminating), TaskEventTAFailed.class, 1); + expectedEventsAfterTerminating), TaskEventTAFailed.class, 1); TaskEventTAFailed failedEvent = (TaskEventTAFailed) e; assertEquals(TaskFailureType.FATAL, failedEvent.getTaskFailureType()); verifyEventType( arg.getAllValues().subList(expectedEventsAtRunning, - expectedEvenstAfterTerminating), AMSchedulerEventTAEnded.class, 1); + expectedEventsAfterTerminating), AMSchedulerEventTAEnded.class, 1); verifyEventType( arg.getAllValues().subList(expectedEventsAtRunning, - expectedEvenstAfterTerminating), DAGEventCounterUpdate.class, 1); + expectedEventsAfterTerminating), DAGEventCounterUpdate.class, 1); verifyEventType( arg.getAllValues().subList(expectedEventsAtRunning, - expectedEvenstAfterTerminating), SpeculatorEventTaskAttemptStatusUpdate.class, 2); + expectedEventsAfterTerminating), SpeculatorEventTaskAttemptStatusUpdate.class, 2); } @Test @@ -1032,7 +1063,7 @@ public void testProgressTimeStampUpdate() throws Exception { taListener, taskConf, mockClock, mockHeartbeatHandler, appCtx, false, resource, createFakeContainerContext(), false); - TezTaskAttemptID taskAttemptID = taImpl.getID(); + TezTaskAttemptID taskAttemptID = taImpl.getTaskAttemptID(); ArgumentCaptor arg = ArgumentCaptor.forClass(Event.class); taImpl.handle(new TaskAttemptEventSchedule(taskAttemptID, 0, 0)); @@ -1048,7 +1079,7 @@ public void testProgressTimeStampUpdate() throws Exception { verify(eventHandler, atLeast(1)).handle(arg.capture()); if (arg.getValue() instanceof TaskAttemptEventAttemptFailed) { TaskAttemptEventAttemptFailed fEvent = (TaskAttemptEventAttemptFailed) arg.getValue(); - assertEquals(taImpl.getID(), fEvent.getTaskAttemptID()); + assertEquals(taImpl.getTaskAttemptID(), fEvent.getTaskAttemptID()); assertEquals(TaskAttemptTerminationCause.NO_PROGRESS, fEvent.getTerminationCause()); taImpl.handle(fEvent); fail("Should not fail since the timestamps do not differ by progress interval config"); @@ -1063,6 +1094,130 @@ public void testProgressTimeStampUpdate() throws Exception { Assert.assertTrue("This should have been an attempt failed event!", arg.getValue() instanceof TaskAttemptEventAttemptFailed); } + @Test + public void testStatusUpdateWithNullCounters() throws Exception { + ApplicationId appId = ApplicationId.newInstance(1, 2); + ApplicationAttemptId appAttemptId = ApplicationAttemptId.newInstance( + appId, 0); + TezDAGID dagID = TezDAGID.getInstance(appId, 1); + TezVertexID vertexID = TezVertexID.getInstance(dagID, 1); + TezTaskID taskID = TezTaskID.getInstance(vertexID, 1); + + MockEventHandler eventHandler = spy(new MockEventHandler()); + TaskCommunicatorManagerInterface taListener = createMockTaskAttemptListener(); + + Configuration taskConf = new Configuration(); + taskConf.setClass("fs.file.impl", StubbedFS.class, FileSystem.class); + taskConf.setBoolean("fs.file.impl.disable.cache", true); + + locationHint = TaskLocationHint.createTaskLocationHint( + new HashSet(Arrays.asList(new String[]{"127.0.0.1"})), null); + Resource resource = Resource.newInstance(1024, 1); + + NodeId nid = NodeId.newInstance("127.0.0.1", 0); + ContainerId contId = ContainerId.newContainerId(appAttemptId, 3); + Container container = mock(Container.class); + when(container.getId()).thenReturn(contId); + when(container.getNodeId()).thenReturn(nid); + when(container.getNodeHttpAddress()).thenReturn("localhost:0"); + + AMContainerMap containers = new AMContainerMap( + mock(ContainerHeartbeatHandler.class), mock(TaskCommunicatorManagerInterface.class), + new ContainerContextMatcher(), appCtx); + containers.addContainerIfNew(container, 0, 0, 0); + + doReturn(new ClusterInfo()).when(appCtx).getClusterInfo(); + doReturn(containers).when(appCtx).getAllContainers(); + + TaskHeartbeatHandler mockHeartbeatHandler = mock(TaskHeartbeatHandler.class); + TaskAttemptImpl taImpl = new MockTaskAttemptImpl(taskID, 1, eventHandler, + taListener, taskConf, new SystemClock(), + mockHeartbeatHandler, appCtx, false, + resource, createFakeContainerContext(), false); + TezTaskAttemptID taskAttemptID = taImpl.getTaskAttemptID(); + + taImpl.handle(new TaskAttemptEventSchedule(taskAttemptID, 0, 0)); + taImpl.handle(new TaskAttemptEventSubmitted(taskAttemptID, contId)); + taImpl.handle(new TaskAttemptEventStartedRemotely(taskAttemptID)); + assertEquals("Task attempt is not in the RUNNING state", taImpl.getState(), TaskAttemptState.RUNNING); + verify(mockHeartbeatHandler).register(taskAttemptID); + + TezCounters counters = new TezCounters(); + counters.findCounter("group", "counter").increment(1); + taImpl.handle(new TaskAttemptEventStatusUpdate( + taskAttemptID, new TaskStatusUpdateEvent(counters, 0.1f, null, false))); + assertEquals(1, taImpl.getCounters().findCounter("group", "counter").getValue()); + taImpl.handle(new TaskAttemptEventStatusUpdate( + taskAttemptID, new TaskStatusUpdateEvent(null, 0.1f, null, false))); + assertEquals(1, taImpl.getCounters().findCounter("group", "counter").getValue()); + counters.findCounter("group", "counter").increment(1); + taImpl.handle(new TaskAttemptEventStatusUpdate( + taskAttemptID, new TaskStatusUpdateEvent(counters, 0.1f, null, false))); + assertEquals(2, taImpl.getCounters().findCounter("group", "counter").getValue()); + taImpl.handle(new TaskAttemptEventStatusUpdate( + taskAttemptID, new TaskStatusUpdateEvent(null, 0.1f, null, false))); + assertEquals(2, taImpl.getCounters().findCounter("group", "counter").getValue()); + } + + @Test (timeout = 60000L) + public void testProgressAfterSubmit() throws Exception { + ApplicationId appId = ApplicationId.newInstance(1, 2); + ApplicationAttemptId appAttemptId = ApplicationAttemptId.newInstance( + appId, 0); + TezDAGID dagID = TezDAGID.getInstance(appId, 1); + TezVertexID vertexID = TezVertexID.getInstance(dagID, 1); + TezTaskID taskID = TezTaskID.getInstance(vertexID, 1); + + MockEventHandler eventHandler = spy(new MockEventHandler()); + TaskCommunicatorManagerInterface taListener = createMockTaskAttemptListener(); + + Configuration taskConf = new Configuration(); + taskConf.setClass("fs.file.impl", StubbedFS.class, FileSystem.class); + taskConf.setBoolean("fs.file.impl.disable.cache", true); + taskConf.setLong(TezConfiguration.TEZ_TASK_PROGRESS_STUCK_INTERVAL_MS, 50); + + locationHint = TaskLocationHint.createTaskLocationHint( + new HashSet(Arrays.asList(new String[]{"127.0.0.1"})), null); + Resource resource = Resource.newInstance(1024, 1); + + NodeId nid = NodeId.newInstance("127.0.0.1", 0); + @SuppressWarnings("deprecation") + ContainerId contId = ContainerId.newInstance(appAttemptId, 3); + Container container = mock(Container.class); + when(container.getId()).thenReturn(contId); + when(container.getNodeId()).thenReturn(nid); + when(container.getNodeHttpAddress()).thenReturn("localhost:0"); + + AMContainerMap containers = new AMContainerMap( + mock(ContainerHeartbeatHandler.class), mock(TaskCommunicatorManagerInterface.class), + new ContainerContextMatcher(), appCtx); + containers.addContainerIfNew(container, 0, 0, 0); + + doReturn(new ClusterInfo()).when(appCtx).getClusterInfo(); + doReturn(containers).when(appCtx).getAllContainers(); + + TaskHeartbeatHandler mockHeartbeatHandler = mock(TaskHeartbeatHandler.class); + MockClock mockClock = new MockClock(); + TaskAttemptImpl taImpl = new MockTaskAttemptImpl(taskID, 1, eventHandler, + taListener, taskConf, mockClock, + mockHeartbeatHandler, appCtx, false, + resource, createFakeContainerContext(), false); + TezTaskAttemptID taskAttemptID = taImpl.getTaskAttemptID(); + ArgumentCaptor arg = ArgumentCaptor.forClass(Event.class); + mockClock.incrementTime(20L); + taImpl.handle(new TaskAttemptEventSchedule(taskAttemptID, 0, 0)); + mockClock.incrementTime(55L); + taImpl.handle(new TaskAttemptEventSubmitted(taskAttemptID, contId)); + taImpl.handle(new TaskAttemptEventStatusUpdate( + taskAttemptID, new TaskStatusUpdateEvent(null, 0.1f, null, false))); + verify(eventHandler, atLeast(1)).handle(arg.capture()); + if (arg.getValue() instanceof TaskAttemptEvent) { + taImpl.handle((TaskAttemptEvent) arg.getValue()); + } + Assert.assertEquals("Task Attempt's internal state should be SUBMITTED!", + taImpl.getInternalState(), TaskAttemptStateInternal.SUBMITTED); + } + @Test (timeout = 5000) public void testNoProgressFail() throws Exception { ApplicationId appId = ApplicationId.newInstance(1, 2); @@ -1106,7 +1261,7 @@ public void testNoProgressFail() throws Exception { taListener, taskConf, mockClock, mockHeartbeatHandler, appCtx, false, resource, createFakeContainerContext(), false); - TezTaskAttemptID taskAttemptID = taImpl.getID(); + TezTaskAttemptID taskAttemptID = taImpl.getTaskAttemptID(); ArgumentCaptor arg = ArgumentCaptor.forClass(Event.class); taImpl.handle(new TaskAttemptEventSchedule(taskAttemptID, 0, 0)); @@ -1139,7 +1294,7 @@ public void testNoProgressFail() throws Exception { // failed event sent to self verify(eventHandler, atLeast(1)).handle(arg.capture()); TaskAttemptEventAttemptFailed fEvent = (TaskAttemptEventAttemptFailed) arg.getValue(); - assertEquals(taImpl.getID(), fEvent.getTaskAttemptID()); + assertEquals(taImpl.getTaskAttemptID(), fEvent.getTaskAttemptID()); assertEquals(TaskAttemptTerminationCause.NO_PROGRESS, fEvent.getTerminationCause()); assertEquals(TaskFailureType.NON_FATAL, fEvent.getTaskFailureType()); taImpl.handle(fEvent); @@ -1221,7 +1376,7 @@ public void testCompletedAtSubmitted() throws ServicePluginException { taListener, taskConf, new SystemClock(), mockHeartbeatHandler, appCtx, false, resource, createFakeContainerContext(), false); - TezTaskAttemptID taskAttemptID = taImpl.getID(); + TezTaskAttemptID taskAttemptID = taImpl.getTaskAttemptID(); ArgumentCaptor arg = ArgumentCaptor.forClass(Event.class); taImpl.handle(new TaskAttemptEventSchedule(taskAttemptID, 0, 0)); @@ -1245,21 +1400,21 @@ taListener, taskConf, new SystemClock(), verify(mockHeartbeatHandler).unregister(taskAttemptID); assertEquals(0, taImpl.getDiagnostics().size()); - int expectedEvenstAfterTerminating = expectedEventsAtStarting + 3; + int expectedEventsAfterTerminating = expectedEventsAtStarting + 3; arg = ArgumentCaptor.forClass(Event.class); - verify(eventHandler, times(expectedEvenstAfterTerminating)).handle(arg.capture()); + verify(eventHandler, times(expectedEventsAfterTerminating)).handle(arg.capture()); Event e = verifyEventType( arg.getAllValues().subList(expectedEventsAtStarting, - expectedEvenstAfterTerminating), TaskEventTASucceeded.class, 1); + expectedEventsAfterTerminating), TaskEventTASucceeded.class, 1); assertEquals(TaskEventType.T_ATTEMPT_SUCCEEDED, e.getType()); verifyEventType( arg.getAllValues().subList(expectedEventsAtStarting, - expectedEvenstAfterTerminating), AMSchedulerEventTAEnded.class, 1); + expectedEventsAfterTerminating), AMSchedulerEventTAEnded.class, 1); verifyEventType( arg.getAllValues().subList(expectedEventsAtStarting, - expectedEvenstAfterTerminating), DAGEventCounterUpdate.class, 1); + expectedEventsAfterTerminating), DAGEventCounterUpdate.class, 1); } @Test(timeout = 5000) @@ -1304,7 +1459,7 @@ public void testSuccess() throws Exception { taListener, taskConf, new SystemClock(), mockHeartbeatHandler, appCtx, false, resource, createFakeContainerContext(), false); - TezTaskAttemptID taskAttemptID = taImpl.getID(); + TezTaskAttemptID taskAttemptID = taImpl.getTaskAttemptID(); ArgumentCaptor arg = ArgumentCaptor.forClass(Event.class); taImpl.handle(new TaskAttemptEventSchedule(taskAttemptID, 0, 0)); @@ -1330,23 +1485,23 @@ taListener, taskConf, new SystemClock(), verify(mockHeartbeatHandler).unregister(taskAttemptID); assertEquals(0, taImpl.getDiagnostics().size()); - int expectedEvenstAfterTerminating = expectedEventsAtRunning + 5; + int expectedEventsAfterTerminating = expectedEventsAtRunning + 5; arg = ArgumentCaptor.forClass(Event.class); - verify(eventHandler, times(expectedEvenstAfterTerminating)).handle(arg.capture()); + verify(eventHandler, times(expectedEventsAfterTerminating)).handle(arg.capture()); Event e = verifyEventType( arg.getAllValues().subList(expectedEventsAtRunning, - expectedEvenstAfterTerminating), TaskEventTASucceeded.class, 1); + expectedEventsAfterTerminating), TaskEventTASucceeded.class, 1); assertEquals(TaskEventType.T_ATTEMPT_SUCCEEDED, e.getType()); verifyEventType( arg.getAllValues().subList(expectedEventsAtRunning, - expectedEvenstAfterTerminating), AMSchedulerEventTAEnded.class, 1); + expectedEventsAfterTerminating), AMSchedulerEventTAEnded.class, 1); verifyEventType( arg.getAllValues().subList(expectedEventsAtRunning, - expectedEvenstAfterTerminating), DAGEventCounterUpdate.class, 1); + expectedEventsAfterTerminating), DAGEventCounterUpdate.class, 1); verifyEventType( arg.getAllValues().subList(expectedEventsAtRunning, - expectedEvenstAfterTerminating), SpeculatorEventTaskAttemptStatusUpdate.class, 2); + expectedEventsAfterTerminating), SpeculatorEventTaskAttemptStatusUpdate.class, 2); } @Test(timeout = 5000) @@ -1392,7 +1547,7 @@ public void testContainerPreemptedAfterSuccess() throws Exception { taListener, taskConf, new SystemClock(), mockHeartbeatHandler, appCtx, false, resource, createFakeContainerContext(), false); - TezTaskAttemptID taskAttemptID = taImpl.getID(); + TezTaskAttemptID taskAttemptID = taImpl.getTaskAttemptID(); ArgumentCaptor arg = ArgumentCaptor.forClass(Event.class); @@ -1484,7 +1639,7 @@ public void testNodeFailedNonLeafVertex() throws Exception { taListener, taskConf, new SystemClock(), mockHeartbeatHandler, appCtx, false, resource, createFakeContainerContext(), false); - TezTaskAttemptID taskAttemptID = taImpl.getID(); + TezTaskAttemptID taskAttemptID = taImpl.getTaskAttemptID(); ArgumentCaptor arg = ArgumentCaptor.forClass(Event.class); @@ -1505,20 +1660,20 @@ taListener, taskConf, new SystemClock(), verify(mockHeartbeatHandler).unregister(taskAttemptID); assertEquals(0, taImpl.getDiagnostics().size()); - int expectedEvenstAfterTerminating = expectedEventsAtRunning + 3; + int expectedEventsAfterTerminating = expectedEventsAtRunning + 3; arg = ArgumentCaptor.forClass(Event.class); - verify(eventHandler, times(expectedEvenstAfterTerminating)).handle(arg.capture()); + verify(eventHandler, times(expectedEventsAfterTerminating)).handle(arg.capture()); verifyEventType( arg.getAllValues().subList(expectedEventsAtRunning, - expectedEvenstAfterTerminating), TaskEventTASucceeded.class, 1); + expectedEventsAfterTerminating), TaskEventTASucceeded.class, 1); verifyEventType( arg.getAllValues().subList(expectedEventsAtRunning, - expectedEvenstAfterTerminating), AMSchedulerEventTAEnded.class, 1); + expectedEventsAfterTerminating), AMSchedulerEventTAEnded.class, 1); verifyEventType( arg.getAllValues().subList(expectedEventsAtRunning, - expectedEvenstAfterTerminating), DAGEventCounterUpdate.class, 1); + expectedEventsAfterTerminating), DAGEventCounterUpdate.class, 1); // Send out a Node Failure. taImpl.handle(new TaskAttemptEventNodeFailed(taskAttemptID, "NodeDecomissioned", @@ -1530,11 +1685,11 @@ taListener, taskConf, new SystemClock(), verify(mockHeartbeatHandler, times(1)).unregister(taskAttemptID); assertEquals(true, taImpl.inputFailedReported); // Verify one event to the Task informing it about FAILURE. No events to scheduler. Counter event. - int expectedEventsNodeFailure = expectedEvenstAfterTerminating + 2; + int expectedEventsNodeFailure = expectedEventsAfterTerminating + 2; arg = ArgumentCaptor.forClass(Event.class); verify(eventHandler, times(expectedEventsNodeFailure)).handle(arg.capture()); verifyEventType( - arg.getAllValues().subList(expectedEvenstAfterTerminating, + arg.getAllValues().subList(expectedEventsAfterTerminating, expectedEventsNodeFailure), TaskEventTAKilled.class, 1); // Verify still in KILLED state @@ -1543,7 +1698,7 @@ taListener, taskConf, new SystemClock(), assertEquals(TaskAttemptTerminationCause.NODE_FAILED, taImpl.getTerminationCause()); assertEquals(TaskAttemptStateInternal.KILLED, taImpl.getInternalState()); - taImpl.handle(new TaskAttemptEventTezEventUpdate(taImpl.getID(), Collections.EMPTY_LIST)); + taImpl.handle(new TaskAttemptEventTezEventUpdate(taImpl.getTaskAttemptID(), Collections.EMPTY_LIST)); assertFalse( "InternalError occurred trying to handle TA_TEZ_EVENT_UPDATE in KILLED state", eventHandler.internalError); @@ -1591,7 +1746,7 @@ public void testNodeFailedLeafVertex() throws Exception { taListener, taskConf, new SystemClock(), mockHeartbeatHandler, appCtx, false, resource, createFakeContainerContext(), true); - TezTaskAttemptID taskAttemptID = taImpl.getID(); + TezTaskAttemptID taskAttemptID = taImpl.getTaskAttemptID(); ArgumentCaptor arg = ArgumentCaptor.forClass(Event.class); @@ -1612,26 +1767,26 @@ taListener, taskConf, new SystemClock(), verify(mockHeartbeatHandler).unregister(taskAttemptID); assertEquals(0, taImpl.getDiagnostics().size()); - int expectedEvenstAfterTerminating = expectedEventsAtRunning + 3; + int expectedEventsAfterTerminating = expectedEventsAtRunning + 3; arg = ArgumentCaptor.forClass(Event.class); - verify(eventHandler, times(expectedEvenstAfterTerminating)).handle(arg.capture()); + verify(eventHandler, times(expectedEventsAfterTerminating)).handle(arg.capture()); verifyEventType( arg.getAllValues().subList(expectedEventsAtRunning, - expectedEvenstAfterTerminating), TaskEventTASucceeded.class, 1); + expectedEventsAfterTerminating), TaskEventTASucceeded.class, 1); verifyEventType( arg.getAllValues().subList(expectedEventsAtRunning, - expectedEvenstAfterTerminating), AMSchedulerEventTAEnded.class, 1); + expectedEventsAfterTerminating), AMSchedulerEventTAEnded.class, 1); verifyEventType( arg.getAllValues().subList(expectedEventsAtRunning, - expectedEvenstAfterTerminating), DAGEventCounterUpdate.class, 1); + expectedEventsAfterTerminating), DAGEventCounterUpdate.class, 1); // Send out a Node Failure. taImpl.handle(new TaskAttemptEventNodeFailed(taskAttemptID, "NodeDecomissioned", TaskAttemptTerminationCause.NODE_FAILED)); // Verify no additional events - int expectedEventsNodeFailure = expectedEvenstAfterTerminating + 0; + int expectedEventsNodeFailure = expectedEventsAfterTerminating + 0; arg = ArgumentCaptor.forClass(Event.class); verify(eventHandler, times(expectedEventsNodeFailure)).handle(arg.capture()); @@ -1684,13 +1839,14 @@ public void testMultipleOutputFailed() throws Exception { doReturn(containers).when(appCtx).getAllContainers(); HistoryEventHandler mockHistHandler = mock(HistoryEventHandler.class); doReturn(mockHistHandler).when(appCtx).getHistoryHandler(); + DAGImpl mockDAG = mock(DAGImpl.class); TaskHeartbeatHandler mockHeartbeatHandler = mock(TaskHeartbeatHandler.class); MockTaskAttemptImpl taImpl = new MockTaskAttemptImpl(taskID, 1, eventHandler, taListener, taskConf, new SystemClock(), mockHeartbeatHandler, appCtx, false, resource, createFakeContainerContext(), false); - TezTaskAttemptID taskAttemptID = taImpl.getID(); + TezTaskAttemptID taskAttemptID = taImpl.getTaskAttemptID(); taImpl.handle(new TaskAttemptEventSchedule(taskAttemptID, 0, 0)); taImpl.handle(new TaskAttemptEventSubmitted(taskAttemptID, contId)); @@ -1716,6 +1872,15 @@ taListener, taskConf, new SystemClock(), EventMetaData mockMeta = mock(EventMetaData.class); TezTaskAttemptID mockDestId1 = mock(TezTaskAttemptID.class); when(mockMeta.getTaskAttemptID()).thenReturn(mockDestId1); + TezTaskID destTaskID = mock(TezTaskID.class); + TezVertexID destVertexID = mock(TezVertexID.class); + when(mockDestId1.getTaskID()).thenReturn(destTaskID); + when(mockDestId1.getVertexID()).thenReturn(destVertexID); + when(destTaskID.getVertexID()).thenReturn(destVertexID); + Vertex destVertex = mock(VertexImpl.class); + when(destVertex.getRunningTasks()).thenReturn(11); + when(mockDAG.getVertex(destVertexID)).thenReturn(destVertex); + when(appCtx.getCurrentDAG()).thenReturn(mockDAG); TezEvent tzEvent = new TezEvent(mockReEvent, mockMeta); taImpl.handle(new TaskAttemptEventOutputFailed(taskAttemptID, tzEvent, 11)); @@ -1732,11 +1897,18 @@ taListener, taskConf, new SystemClock(), // different destination attempt reports error. now threshold crossed TezTaskAttemptID mockDestId2 = mock(TezTaskAttemptID.class); - when(mockMeta.getTaskAttemptID()).thenReturn(mockDestId2); + when(mockMeta.getTaskAttemptID()).thenReturn(mockDestId2); + destTaskID = mock(TezTaskID.class); + destVertexID = mock(TezVertexID.class); + when(mockDestId2.getTaskID()).thenReturn(destTaskID); + when(mockDestId2.getVertexID()).thenReturn(destVertexID); + when(destTaskID.getVertexID()).thenReturn(destVertexID); + destVertex = mock(VertexImpl.class); + when(destVertex.getRunningTasks()).thenReturn(11); + when(mockDAG.getVertex(destVertexID)).thenReturn(destVertex); taImpl.handle(new TaskAttemptEventOutputFailed(taskAttemptID, tzEvent, 11)); - assertEquals("Task attempt is not in FAILED state", taImpl.getState(), - TaskAttemptState.FAILED); + assertEquals("Task attempt is not in FAILED state", TaskAttemptState.FAILED, taImpl.getState()); assertEquals(TaskAttemptTerminationCause.OUTPUT_LOST, taImpl.getTerminationCause()); // verify unregister is not invoked again verify(mockHeartbeatHandler, times(1)).unregister(taskAttemptID); @@ -1749,7 +1921,7 @@ taListener, taskConf, new SystemClock(), assertEquals(true, taImpl.inputFailedReported); int expectedEventsAfterFetchFailure = expectedEventsTillSucceeded + 2; - arg.getAllValues().clear(); + arg = ArgumentCaptor.forClass(Event.class); verify(eventHandler, times(expectedEventsAfterFetchFailure)).handle(arg.capture()); Event e = verifyEventType( arg.getAllValues().subList(expectedEventsTillSucceeded, @@ -1767,13 +1939,17 @@ taListener, taskConf, new SystemClock(), verify(eventHandler, times(expectedEventsAfterFetchFailure)).handle( arg.capture()); - taskConf.setInt(TezConfiguration.TEZ_TASK_MAX_ALLOWED_OUTPUT_FAILURES, 1); + Configuration newVertexConf = new Configuration(vertexConf); + newVertexConf.setInt(TezConfiguration.TEZ_TASK_MAX_ALLOWED_OUTPUT_FAILURES, + 1); + createMockVertex(newVertexConf); + TezTaskID taskID2 = TezTaskID.getInstance(vertexID, 2); MockTaskAttemptImpl taImpl2 = new MockTaskAttemptImpl(taskID2, 1, eventHandler, taListener, taskConf, new SystemClock(), mockHeartbeatHandler, appCtx, false, resource, createFakeContainerContext(), false); - TezTaskAttemptID taskAttemptID2 = taImpl2.getID(); + TezTaskAttemptID taskAttemptID2 = taImpl2.getTaskAttemptID(); taImpl2.handle(new TaskAttemptEventSchedule(taskAttemptID2, 0, 0)); taImpl2.handle(new TaskAttemptEventSubmitted(taskAttemptID2, contId)); @@ -1787,6 +1963,8 @@ taListener, taskConf, new SystemClock(), mockReEvent = InputReadErrorEvent.create("", 1, 1); mockMeta = mock(EventMetaData.class); mockDestId1 = mock(TezTaskAttemptID.class); + when(mockDestId1.getTaskID()).thenReturn(destTaskID); + when(mockDestId1.getVertexID()).thenReturn(destVertexID); when(mockMeta.getTaskAttemptID()).thenReturn(mockDestId1); tzEvent = new TezEvent(mockReEvent, mockMeta); //This should fail even when MAX_ALLOWED_OUTPUT_FAILURES_FRACTION is within limits, as @@ -1800,14 +1978,21 @@ taListener, taskConf, new SystemClock(), Clock mockClock = mock(Clock.class); int readErrorTimespanSec = 1; - taskConf.setInt(TezConfiguration.TEZ_TASK_MAX_ALLOWED_OUTPUT_FAILURES, 10); - taskConf.setInt(TezConfiguration.TEZ_AM_MAX_ALLOWED_TIME_FOR_TASK_READ_ERROR_SEC, readErrorTimespanSec); + + newVertexConf = new Configuration(vertexConf); + newVertexConf.setInt(TezConfiguration.TEZ_TASK_MAX_ALLOWED_OUTPUT_FAILURES, + 10); + newVertexConf.setInt( + TezConfiguration.TEZ_AM_MAX_ALLOWED_TIME_FOR_TASK_READ_ERROR_SEC, + readErrorTimespanSec); + createMockVertex(newVertexConf); + TezTaskID taskID3 = TezTaskID.getInstance(vertexID, 3); MockTaskAttemptImpl taImpl3 = new MockTaskAttemptImpl(taskID3, 1, eventHandler, taListener, taskConf, mockClock, mockHeartbeatHandler, appCtx, false, resource, createFakeContainerContext(), false); - TezTaskAttemptID taskAttemptID3 = taImpl3.getID(); + TezTaskAttemptID taskAttemptID3 = taImpl3.getTaskAttemptID(); taImpl3.handle(new TaskAttemptEventSchedule(taskAttemptID3, 0, 0)); taImpl3.handle(new TaskAttemptEventSubmitted(taskAttemptID3, contId)); @@ -1821,9 +2006,12 @@ taListener, taskConf, new SystemClock(), mockReEvent = InputReadErrorEvent.create("", 1, 1); mockMeta = mock(EventMetaData.class); mockDestId1 = mock(TezTaskAttemptID.class); + when(mockDestId1.getTaskID()).thenReturn(destTaskID); + when(mockDestId1.getVertexID()).thenReturn(destVertexID); when(mockMeta.getTaskAttemptID()).thenReturn(mockDestId1); tzEvent = new TezEvent(mockReEvent, mockMeta); when(mockClock.getTime()).thenReturn(1000L); + when(destVertex.getRunningTasks()).thenReturn(1000); // time deadline not exceeded for a couple of read error events taImpl3.handle(new TaskAttemptEventOutputFailed(taskAttemptID3, tzEvent, 1000)); assertEquals("Task attempt is not in succeeded state", taImpl3.getState(), @@ -1842,6 +2030,93 @@ taListener, taskConf, new SystemClock(), verify(mockHeartbeatHandler, times(1)).unregister(taskAttemptID3); } + @Test(timeout = 60000) + public void testTAFailureBasedOnRunningTasks() throws Exception { + ApplicationId appId = ApplicationId.newInstance(1, 2); + ApplicationAttemptId appAttemptId = ApplicationAttemptId.newInstance( + appId, 0); + TezDAGID dagID = TezDAGID.getInstance(appId, 1); + TezVertexID vertexID = TezVertexID.getInstance(dagID, 1); + TezTaskID taskID = TezTaskID.getInstance(vertexID, 1); + + MockEventHandler mockEh = new MockEventHandler(); + MockEventHandler eventHandler = spy(mockEh); + TaskCommunicatorManagerInterface taListener = createMockTaskAttemptListener(); + + Configuration taskConf = new Configuration(); + taskConf.setClass("fs.file.impl", StubbedFS.class, FileSystem.class); + taskConf.setBoolean("fs.file.impl.disable.cache", true); + + locationHint = TaskLocationHint.createTaskLocationHint( + new HashSet(Arrays.asList(new String[]{"127.0.0.1"})), null); + Resource resource = Resource.newInstance(1024, 1); + + NodeId nid = NodeId.newInstance("127.0.0.1", 0); + @SuppressWarnings("deprecation") + ContainerId contId = ContainerId.newInstance(appAttemptId, 3); + Container container = mock(Container.class); + when(container.getId()).thenReturn(contId); + when(container.getNodeId()).thenReturn(nid); + when(container.getNodeHttpAddress()).thenReturn("localhost:0"); + + AMContainerMap containers = new AMContainerMap( + mock(ContainerHeartbeatHandler.class), mock(TaskCommunicatorManagerInterface.class), + new ContainerContextMatcher(), appCtx); + containers.addContainerIfNew(container, 0, 0, 0); + + doReturn(new ClusterInfo()).when(appCtx).getClusterInfo(); + doReturn(containers).when(appCtx).getAllContainers(); + HistoryEventHandler mockHistHandler = mock(HistoryEventHandler.class); + doReturn(mockHistHandler).when(appCtx).getHistoryHandler(); + DAGImpl mockDAG = mock(DAGImpl.class); + + TaskHeartbeatHandler mockHeartbeatHandler = mock(TaskHeartbeatHandler.class); + MockTaskAttemptImpl taImpl = new MockTaskAttemptImpl(taskID, 1, eventHandler, + taListener, taskConf, new SystemClock(), + mockHeartbeatHandler, appCtx, false, + resource, createFakeContainerContext(), false); + TezTaskAttemptID taskAttemptID = taImpl.getTaskAttemptID(); + + taImpl.handle(new TaskAttemptEventSchedule(taskAttemptID, 0, 0)); + taImpl.handle(new TaskAttemptEventSubmitted(taskAttemptID, contId)); + taImpl.handle(new TaskAttemptEventStartedRemotely(taskAttemptID)); + verify(mockHeartbeatHandler).register(taskAttemptID); + taImpl.handle(new TaskAttemptEvent(taskAttemptID, + TaskAttemptEventType.TA_DONE)); + assertEquals("Task attempt is not in succeeded state", taImpl.getState(), + TaskAttemptState.SUCCEEDED); + verify(mockHeartbeatHandler).unregister(taskAttemptID); + + int expectedEventsTillSucceeded = 8; + ArgumentCaptor arg = ArgumentCaptor.forClass(Event.class); + ArgumentCaptor histArg = ArgumentCaptor.forClass(DAGHistoryEvent.class); + verify(eventHandler, times(expectedEventsTillSucceeded)).handle(arg.capture()); + verify(mockHistHandler, times(2)).handle(histArg.capture()); // start and finish + DAGHistoryEvent histEvent = histArg.getValue(); + TaskAttemptFinishedEvent finishEvent = (TaskAttemptFinishedEvent)histEvent.getHistoryEvent(); + long finishTime = finishEvent.getFinishTime(); + verifyEventType(arg.getAllValues(), TaskEventTAUpdate.class, 2); + + InputReadErrorEvent mockReEvent = InputReadErrorEvent.create("", 0, 1); + EventMetaData mockMeta = mock(EventMetaData.class); + TezTaskAttemptID mockDestId1 = mock(TezTaskAttemptID.class); + when(mockMeta.getTaskAttemptID()).thenReturn(mockDestId1); + TezTaskID destTaskID = mock(TezTaskID.class); + TezVertexID destVertexID = mock(TezVertexID.class); + when(mockDestId1.getTaskID()).thenReturn(destTaskID); + when(mockDestId1.getVertexID()).thenReturn(destVertexID); + when(destTaskID.getVertexID()).thenReturn(destVertexID); + Vertex destVertex = mock(VertexImpl.class); + when(destVertex.getRunningTasks()).thenReturn(5); + when(mockDAG.getVertex(destVertexID)).thenReturn(destVertex); + when(appCtx.getCurrentDAG()).thenReturn(mockDAG); + TezEvent tzEvent = new TezEvent(mockReEvent, mockMeta); + taImpl.handle(new TaskAttemptEventOutputFailed(taskAttemptID, tzEvent, 11)); + + // failure threshold is met due to running tasks. state is FAILED + assertEquals("Task attempt is not in FAILED state", TaskAttemptState.FAILED, taImpl.getState()); + } + @SuppressWarnings("deprecation") @Test(timeout = 5000) public void testKilledInNew() throws ServicePluginException { @@ -1884,7 +2159,7 @@ taListener, taskConf, new SystemClock(), mockHeartbeatHandler, appCtx, false, resource, createFakeContainerContext(), true); Assert.assertEquals(TaskAttemptStateInternal.NEW, taImpl.getInternalState()); - taImpl.handle(new TaskAttemptEventKillRequest(taImpl.getID(), "kill it", + taImpl.handle(new TaskAttemptEventKillRequest(taImpl.getTaskAttemptID(), "kill it", TaskAttemptTerminationCause.TERMINATED_BY_CLIENT)); Assert.assertEquals(TaskAttemptStateInternal.KILLED, taImpl.getInternalState()); @@ -1892,6 +2167,181 @@ taListener, taskConf, new SystemClock(), Assert.assertEquals(1, taImpl.taskAttemptFinishedEventLogged); } + @Test + public void testMapTaskIsBlamedImmediatelyOnLocalFetchFailure() throws ServicePluginException { + // local fetch failure or disk read error at source -> turn source attempt to FAIL_IN_PROGRESS + testMapTaskFailingForFetchFailureType(true, true, TaskAttemptStateInternal.FAIL_IN_PROGRESS); + testMapTaskFailingForFetchFailureType(true, false, TaskAttemptStateInternal.FAIL_IN_PROGRESS); + testMapTaskFailingForFetchFailureType(false, true, TaskAttemptStateInternal.FAIL_IN_PROGRESS); + + // remote fetch failure -> won't change current state + testMapTaskFailingForFetchFailureType(false, false, TaskAttemptStateInternal.NEW); + } + + private void testMapTaskFailingForFetchFailureType(boolean isLocalFetch, + boolean isDiskErrorAtSource, TaskAttemptStateInternal expectedState) { + EventHandler eventHandler = mock(EventHandler.class); + TezTaskID taskID = + TezTaskID.getInstance(TezVertexID.getInstance(TezDAGID.getInstance("1", 1, 1), 1), 1); + TaskAttemptImpl sourceAttempt = new MockTaskAttemptImpl(taskID, 1, eventHandler, null, + new Configuration(), SystemClock.getInstance(), mock(TaskHeartbeatHandler.class), appCtx, + false, null, null, false).setNodeId(NodeId.newInstance("somehost", 0)); + + // the original read error event, sent by reducer task + InputReadErrorEvent inputReadErrorEvent = + InputReadErrorEvent.create("", 0, 1, 1, isLocalFetch, isDiskErrorAtSource, null); + TezTaskAttemptID destTaskAttemptId = mock(TezTaskAttemptID.class); + when(destTaskAttemptId.getTaskID()).thenReturn(mock(TezTaskID.class)); + when(destTaskAttemptId.getVertexID()).thenReturn(mock(TezVertexID.class)); + when(appCtx.getCurrentDAG()).thenReturn(mock(DAG.class)); + when(appCtx.getCurrentDAG().getVertex(any(TezVertexID.class))) + .thenReturn(mock(Vertex.class)); + when(mock(Vertex.class).getRunningTasks()).thenReturn(100); + + EventMetaData mockMeta = mock(EventMetaData.class); + when(mockMeta.getTaskAttemptID()).thenReturn(destTaskAttemptId); + TezEvent tezEvent = new TezEvent(inputReadErrorEvent, mockMeta); + + // the event is propagated to map task's event handler + TaskAttemptEventOutputFailed outputFailedEvent = + new TaskAttemptEventOutputFailed(sourceAttempt.getTaskAttemptID(), tezEvent, 11); + + Assert.assertEquals(TaskAttemptStateInternal.NEW, sourceAttempt.getInternalState()); + TaskAttemptStateInternal resultState = new TaskAttemptImpl.OutputReportedFailedTransition() + .transition(sourceAttempt, outputFailedEvent); + Assert.assertEquals(expectedState, resultState); + } + + @Test + public void testMapTaskIsBlamedByDownstreamAttemptsFromDifferentHosts() { + EventHandler eventHandler = mock(EventHandler.class); + TezTaskID taskID = TezTaskID.getInstance(TezVertexID.getInstance(TezDAGID.getInstance("1", 1, 1), 1), 1); + TaskAttemptImpl sourceAttempt = new MockTaskAttemptImpl(taskID, 1, eventHandler, null, new Configuration(), + SystemClock.getInstance(), mock(TaskHeartbeatHandler.class), appCtx, false, null, null, false) + .setNodeId(NodeId.newInstance("somehost", 0)); + + // input read error events from 2 different hosts + InputReadErrorEvent inputReadErrorEvent1 = + InputReadErrorEvent.create("", 0, 1, 1, false, false, "downstream_host_1"); + InputReadErrorEvent inputReadErrorEvent2 = + InputReadErrorEvent.create("", 1, 1, 1, false, false, "downstream_host_2"); + + TezTaskAttemptID destTaskAttemptId = mock(TezTaskAttemptID.class, RETURNS_DEEP_STUBS); + when(destTaskAttemptId.getTaskID()).thenReturn(mock(TezTaskID.class)); + when(destTaskAttemptId.getTaskID().getVertexID()).thenReturn(mock(TezVertexID.class)); + when(appCtx.getCurrentDAG()).thenReturn(mock(DAG.class)); + when(appCtx.getCurrentDAG().getVertex(any(TezVertexID.class))) + .thenReturn(mock(Vertex.class)); + when(mock(Vertex.class).getRunningTasks()).thenReturn(100); + + EventMetaData mockMeta = mock(EventMetaData.class); + when(mockMeta.getTaskAttemptID()).thenReturn(destTaskAttemptId); + + // mapper task succeeded earlier + sourceAttempt.handle(new TaskAttemptEvent(sourceAttempt.getTaskAttemptID(), TaskAttemptEventType.TA_DONE)); + Assert.assertEquals(TaskAttemptStateInternal.SUCCEEDED, sourceAttempt.getInternalState()); + + // the event is propagated to map task's event handler + TezEvent tezEvent = new TezEvent(inputReadErrorEvent1, mockMeta); + TaskAttemptEventOutputFailed outputFailedEvent = + new TaskAttemptEventOutputFailed(sourceAttempt.getTaskAttemptID(), tezEvent, 11); + TaskAttemptStateInternal resultState = + new TaskAttemptImpl.OutputReportedFailedTransition().transition(sourceAttempt, outputFailedEvent); + // SUCCEEDED, as we haven't reached the host limit fraction + // active nodes: 8, failed hosts: 1, fraction 0.125 (< 0.2) + Assert.assertEquals(TaskAttemptStateInternal.SUCCEEDED, resultState); + + // the second event is propagated to map task's event handler + TezEvent tezEvent2 = new TezEvent(inputReadErrorEvent2, mockMeta); + TaskAttemptEventOutputFailed outputFailedEvent2 = + new TaskAttemptEventOutputFailed(sourceAttempt.getTaskAttemptID(), tezEvent2, 11); + TaskAttemptStateInternal resultState2 = + new TaskAttemptImpl.OutputReportedFailedTransition().transition(sourceAttempt, outputFailedEvent2); + + // now it's marked as FAILED + // active nodes: 8, failed hosts: 2, fraction 0.25 (> 0.2) + Assert.assertEquals(TaskAttemptStateInternal.FAILED, resultState2); + } + + @Test + public void testDAGCounterUpdateEvent(){ + TaskAttemptImpl taImpl = getMockTaskAttempt(); + + DAGEventCounterUpdate counterUpdateSucceeded = TaskAttemptImpl.createDAGCounterUpdateEventTAFinished(taImpl, + TaskAttemptState.SUCCEEDED); + List succeededUpdates = counterUpdateSucceeded.getCounterUpdates(); + // SUCCEEDED task related counters are updated (+ WALL_CLOCK_MILLIS) + assertCounterIncrementalUpdate(succeededUpdates, DAGCounter.NUM_SUCCEEDED_TASKS, 1); + assertCounterIncrementalUpdate(succeededUpdates, DAGCounter.DURATION_SUCCEEDED_TASKS_MILLIS, 1000); + assertCounterIncrementalUpdate(succeededUpdates, DAGCounter.WALL_CLOCK_MILLIS, 1000); + // other counters are not updated (no FAILED, no KILLED) + assertCounterIncrementalUpdateNotFound(succeededUpdates, DAGCounter.NUM_FAILED_TASKS); + assertCounterIncrementalUpdateNotFound(succeededUpdates, DAGCounter.NUM_KILLED_TASKS); + assertCounterIncrementalUpdateNotFound(succeededUpdates, DAGCounter.DURATION_FAILED_TASKS_MILLIS); + assertCounterIncrementalUpdateNotFound(succeededUpdates, DAGCounter.DURATION_KILLED_TASKS_MILLIS); + + DAGEventCounterUpdate counterUpdateFailed = TaskAttemptImpl.createDAGCounterUpdateEventTAFinished(taImpl, + TaskAttemptState.FAILED); + List failedUpdates = counterUpdateFailed.getCounterUpdates(); + // FAILED task related counters are updated (+ WALL_CLOCK_MILLIS) + assertCounterIncrementalUpdate(failedUpdates, DAGCounter.NUM_FAILED_TASKS, 1); + assertCounterIncrementalUpdate(failedUpdates, DAGCounter.DURATION_FAILED_TASKS_MILLIS, 1000); + assertCounterIncrementalUpdate(failedUpdates, DAGCounter.WALL_CLOCK_MILLIS, 1000); + // other counters are not updated (no SUCCEEDED, no KILLED) + assertCounterIncrementalUpdateNotFound(failedUpdates, DAGCounter.NUM_SUCCEEDED_TASKS); + assertCounterIncrementalUpdateNotFound(failedUpdates, DAGCounter.NUM_KILLED_TASKS); + assertCounterIncrementalUpdateNotFound(failedUpdates, DAGCounter.DURATION_KILLED_TASKS_MILLIS); + assertCounterIncrementalUpdateNotFound(failedUpdates, DAGCounter.DURATION_SUCCEEDED_TASKS_MILLIS); + + DAGEventCounterUpdate counterUpdateKilled = TaskAttemptImpl.createDAGCounterUpdateEventTAFinished(taImpl, + TaskAttemptState.KILLED); + List killedUpdates = counterUpdateKilled.getCounterUpdates(); + // KILLED task related counters are updated (+ WALL_CLOCK_MILLIS) + assertCounterIncrementalUpdate(killedUpdates, DAGCounter.NUM_KILLED_TASKS, 1); + assertCounterIncrementalUpdate(killedUpdates, DAGCounter.DURATION_KILLED_TASKS_MILLIS, 1000); + assertCounterIncrementalUpdate(killedUpdates, DAGCounter.WALL_CLOCK_MILLIS, 1000); + // other counters are not updated (no SUCCEEDED, no FAILED) + assertCounterIncrementalUpdateNotFound(killedUpdates, DAGCounter.NUM_SUCCEEDED_TASKS); + assertCounterIncrementalUpdateNotFound(killedUpdates, DAGCounter.NUM_FAILED_TASKS); + assertCounterIncrementalUpdateNotFound(killedUpdates, DAGCounter.DURATION_FAILED_TASKS_MILLIS); + assertCounterIncrementalUpdateNotFound(failedUpdates, DAGCounter.DURATION_SUCCEEDED_TASKS_MILLIS); + } + + private TaskAttemptImpl getMockTaskAttempt() { + ApplicationId appId = ApplicationId.newInstance(1, 2); + ApplicationAttemptId appAttemptId = ApplicationAttemptId.newInstance( + appId, 0); + TezDAGID dagID = TezDAGID.getInstance(appId, 1); + TezVertexID vertexID = TezVertexID.getInstance(dagID, 1); + TezTaskID taskID = TezTaskID.getInstance(vertexID, 1); + + return new MockTaskAttemptImpl(taskID, 1, mock(EventHandler.class), + mock(TaskCommunicatorManagerInterface.class), new Configuration(), new MonotonicClock(), + mock(TaskHeartbeatHandler.class), mock(AppContext.class), false, + mock(Resource.class), mock(ContainerContext.class), false); + } + + private void assertCounterIncrementalUpdate(List counterUpdates, + DAGCounter counter, int expectedValue) { + for (DAGEventCounterUpdate.CounterIncrementalUpdate update : counterUpdates) { + if (update.getCounterKey().equals(counter) && update.getIncrementValue() == expectedValue) { + return; + } + } + Assert.fail( + String.format("Haven't found counter update %s=%d, instead seen: %s", counter, expectedValue, counterUpdates)); + } + + private void assertCounterIncrementalUpdateNotFound( + List counterUpdates, DAGCounter counter) { + for (DAGEventCounterUpdate.CounterIncrementalUpdate update : counterUpdates) { + if (update.getCounterKey().equals(counter)) { + Assert.fail( + String.format("Found counter update %s=%d, which is not expected", counter, update.getIncrementValue())); + } + } + } + private Event verifyEventType(List events, Class eventClass, int expectedOccurences) { int count = 0; @@ -1935,12 +2385,17 @@ public MockTaskAttemptImpl(TezTaskID taskId, int attemptNumber, super(TezBuilderUtils.newTaskAttemptId(taskId, attemptNumber), eventHandler, tal, conf, clock, taskHeartbeatHandler, appContext, - isRescheduled, resource, containerContext, leafVertex, mockVertex, + isRescheduled, resource, containerContext, leafVertex, mockTask, locationHint, null, null); } - + boolean inputFailedReported = false; - + + public MockTaskAttemptImpl setNodeId(NodeId nodeId) { + this.container = new TezContainer(Container.newInstance(null, nodeId, null, null, null, null)); + return this; + } + @Override protected Vertex getVertex() { return mockVertex; @@ -1970,6 +2425,11 @@ protected void logJobHistoryAttemptUnsuccesfulCompletion( protected void sendInputFailedToConsumers() { inputFailedReported = true; } + + @Override + public long getDurationNs(){ + return 1000000000L; // 1000000000ns = 1000ms + } } private static ContainerContext createFakeContainerContext() { diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestTaskImpl.java b/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestTaskImpl.java index e03e2829d9..3bf59977e5 100644 --- a/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestTaskImpl.java +++ b/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestTaskImpl.java @@ -40,6 +40,7 @@ import org.apache.tez.dag.app.dag.event.TaskAttemptEventStartedRemotely; import org.apache.tez.dag.app.dag.event.TaskAttemptEventSubmitted; import org.apache.tez.dag.app.dag.event.DAGEventType; +import org.apache.tez.dag.app.dag.event.TaskAttemptEventTerminationCauseEvent; import org.apache.tez.dag.app.dag.event.TaskEvent; import org.apache.tez.dag.app.dag.event.TaskEventTAFailed; import org.apache.tez.dag.app.dag.event.TaskEventTAKilled; @@ -104,7 +105,6 @@ import org.junit.Test; public class TestTaskImpl { - private static final Logger LOG = LoggerFactory.getLogger(TestTaskImpl.class); private int taskCounter = 0; @@ -185,7 +185,7 @@ public void setup() { Vertex vertex = mock(Vertex.class); doReturn(new VertexImpl.VertexConfigImpl(conf)).when(vertex).getVertexConfig(); eventHandler = new TestEventHandler(); - + mockTask = new MockTaskImpl(vertexId, partition, eventHandler, conf, taskCommunicatorManagerInterface, clock, taskHeartbeatHandler, appContext, leafVertex, @@ -286,10 +286,15 @@ private void updateAttemptState(MockTaskAttemptImpl attempt, attempt.setState(s); } - private void killRunningTaskAttempt(TezTaskAttemptID attemptId) { + private void killRunningTaskAttempt(TezTaskAttemptID attemptId, TaskState stateToVerify) { + killRunningTaskAttempt(attemptId, stateToVerify, 1); + } + + private void killRunningTaskAttempt(TezTaskAttemptID attemptId, TaskState stateToVerify, + int killedCountToVerify) { mockTask.handle(createTaskTAKilledEvent(attemptId)); - assertTaskRunningState(); - verify(mockTask.getVertex(), times(1)).incrementKilledTaskAttemptCount(); + assertTaskState(stateToVerify); + verify(mockTask.getVertex(), times(killedCountToVerify)).incrementKilledTaskAttemptCount(); } private void failRunningTaskAttempt(TezTaskAttemptID attemptId) { @@ -310,21 +315,25 @@ private void failRunningTaskAttempt(TezTaskAttemptID attemptId, boolean verifySt * {@link TaskState#NEW} */ private void assertTaskNewState() { - assertEquals(TaskState.NEW, mockTask.getState()); + assertTaskState(TaskState.NEW); } /** * {@link TaskState#SCHEDULED} */ private void assertTaskScheduledState() { - assertEquals(TaskState.SCHEDULED, mockTask.getState()); + assertTaskState(TaskState.SCHEDULED); } /** * {@link TaskState#RUNNING} */ private void assertTaskRunningState() { - assertEquals(TaskState.RUNNING, mockTask.getState()); + assertTaskState(TaskState.RUNNING); + } + + private void assertTaskState(TaskState state) { + assertEquals(state, mockTask.getState()); } /** @@ -377,32 +386,56 @@ public void testKillRunningTask() { LOG.info("--- START: testKillRunningTask ---"); TezTaskID taskId = getNewTaskID(); scheduleTaskAttempt(taskId); - launchTaskAttempt(mockTask.getLastAttempt().getID()); + launchTaskAttempt(mockTask.getLastAttempt().getTaskAttemptID()); killTask(taskId); - mockTask.handle(createTaskTAKilledEvent(mockTask.getLastAttempt().getID())); + mockTask.handle(createTaskTAKilledEvent(mockTask.getLastAttempt().getTaskAttemptID())); assertEquals(TaskStateInternal.KILLED, mockTask.getInternalState()); verifyOutgoingEvents(eventHandler.events, VertexEventType.V_TASK_COMPLETED); } @Test(timeout = 5000) - public void testTooManyFailedAtetmpts() { + public void testTooManyFailedAttempts() { LOG.info("--- START: testTooManyFailedAttempts ---"); TezTaskID taskId = getNewTaskID(); scheduleTaskAttempt(taskId, TaskState.SCHEDULED); - launchTaskAttempt(mockTask.getLastAttempt().getID()); - failRunningTaskAttempt(mockTask.getLastAttempt().getID()); + launchTaskAttempt(mockTask.getLastAttempt().getTaskAttemptID()); + failRunningTaskAttempt(mockTask.getLastAttempt().getTaskAttemptID()); scheduleTaskAttempt(taskId, TaskState.RUNNING); - launchTaskAttempt(mockTask.getLastAttempt().getID()); - failRunningTaskAttempt(mockTask.getLastAttempt().getID()); + launchTaskAttempt(mockTask.getLastAttempt().getTaskAttemptID()); + failRunningTaskAttempt(mockTask.getLastAttempt().getTaskAttemptID()); scheduleTaskAttempt(taskId, TaskState.RUNNING); - launchTaskAttempt(mockTask.getLastAttempt().getID()); - failRunningTaskAttempt(mockTask.getLastAttempt().getID()); + launchTaskAttempt(mockTask.getLastAttempt().getTaskAttemptID()); + failRunningTaskAttempt(mockTask.getLastAttempt().getTaskAttemptID()); scheduleTaskAttempt(taskId, TaskState.RUNNING); - launchTaskAttempt(mockTask.getLastAttempt().getID()); - failRunningTaskAttempt(mockTask.getLastAttempt().getID(), false); + launchTaskAttempt(mockTask.getLastAttempt().getTaskAttemptID()); + failRunningTaskAttempt(mockTask.getLastAttempt().getTaskAttemptID(), false); + + assertEquals(TaskStateInternal.FAILED, mockTask.getInternalState()); + verifyOutgoingEvents(eventHandler.events, VertexEventType.V_TASK_COMPLETED); + } + + @Test(timeout = 5000) + public void testTooManyAttempts() { + LOG.info("--- START: testTooManyAttempts ---"); + + conf.setInt(TezConfiguration.TEZ_AM_TASK_MAX_ATTEMPTS, 3); + Vertex vertex = mock(Vertex.class); + doReturn(new VertexImpl.VertexConfigImpl(conf)).when(vertex).getVertexConfig(); + mockTask = new MockTaskImpl(vertexId, partition, + eventHandler, conf, taskCommunicatorManagerInterface, clock, + taskHeartbeatHandler, appContext, leafVertex, + taskResource, containerContext, vertex); + + TezTaskID taskId = getNewTaskID(); + scheduleTaskAttempt(taskId, TaskState.SCHEDULED); // attempt_0 + launchTaskAttempt(mockTask.getLastAttempt().getTaskAttemptID()); + killRunningTaskAttempt(mockTask.getLastAttempt().getTaskAttemptID(), TaskState.RUNNING, 1); // attempt_1 + + launchTaskAttempt(mockTask.getLastAttempt().getTaskAttemptID()); + killRunningTaskAttempt(mockTask.getLastAttempt().getTaskAttemptID(), TaskState.FAILED, 2); // attempt_2 -> reached 3 assertEquals(TaskStateInternal.FAILED, mockTask.getInternalState()); verifyOutgoingEvents(eventHandler.events, VertexEventType.V_TASK_COMPLETED); @@ -413,9 +446,9 @@ public void testFailedAttemptWithFatalError() { LOG.info("--- START: testFailedAttemptWithFatalError ---"); TezTaskID taskId = getNewTaskID(); scheduleTaskAttempt(taskId, TaskState.SCHEDULED); - launchTaskAttempt(mockTask.getLastAttempt().getID()); + launchTaskAttempt(mockTask.getLastAttempt().getTaskAttemptID()); mockTask.handle( - createTaskTAFailedEvent(mockTask.getLastAttempt().getID(), TaskFailureType.FATAL, null)); + createTaskTAFailedEvent(mockTask.getLastAttempt().getTaskAttemptID(), TaskFailureType.FATAL, null)); assertEquals(TaskStateInternal.FAILED, mockTask.getInternalState()); assertEquals(1, mockTask.failedAttempts); @@ -427,11 +460,11 @@ public void testKillRunningTaskPreviousKilledAttempts() { LOG.info("--- START: testKillRunningTaskPreviousKilledAttempts ---"); TezTaskID taskId = getNewTaskID(); scheduleTaskAttempt(taskId); - launchTaskAttempt(mockTask.getLastAttempt().getID()); - killRunningTaskAttempt(mockTask.getLastAttempt().getID()); + launchTaskAttempt(mockTask.getLastAttempt().getTaskAttemptID()); + killRunningTaskAttempt(mockTask.getLastAttempt().getTaskAttemptID(), TaskState.RUNNING); assertEquals(TaskStateInternal.RUNNING, mockTask.getInternalState()); killTask(taskId); - mockTask.handle(createTaskTAKilledEvent(mockTask.getLastAttempt().getID())); + mockTask.handle(createTaskTAKilledEvent(mockTask.getLastAttempt().getTaskAttemptID())); assertEquals(TaskStateInternal.KILLED, mockTask.getInternalState()); verifyOutgoingEvents(eventHandler.events, VertexEventType.V_TASK_COMPLETED); @@ -445,9 +478,9 @@ public void testKillRunningTaskButAttemptSucceeds() { LOG.info("--- START: testKillRunningTaskButAttemptSucceeds ---"); TezTaskID taskId = getNewTaskID(); scheduleTaskAttempt(taskId); - launchTaskAttempt(mockTask.getLastAttempt().getID()); + launchTaskAttempt(mockTask.getLastAttempt().getTaskAttemptID()); killTask(taskId); - mockTask.handle(createTaskTASucceededEvent(mockTask.getLastAttempt().getID())); + mockTask.handle(createTaskTASucceededEvent(mockTask.getLastAttempt().getTaskAttemptID())); assertEquals(TaskStateInternal.KILLED, mockTask.getInternalState()); } @@ -459,9 +492,9 @@ public void testKillRunningTaskButAttemptFails() { LOG.info("--- START: testKillRunningTaskButAttemptFails ---"); TezTaskID taskId = getNewTaskID(); scheduleTaskAttempt(taskId); - launchTaskAttempt(mockTask.getLastAttempt().getID()); + launchTaskAttempt(mockTask.getLastAttempt().getTaskAttemptID()); killTask(taskId); - mockTask.handle(createTaskTAFailedEvent(mockTask.getLastAttempt().getID())); + mockTask.handle(createTaskTAFailedEvent(mockTask.getLastAttempt().getTaskAttemptID())); assertEquals(TaskStateInternal.KILLED, mockTask.getInternalState()); } @@ -474,8 +507,8 @@ public void testKillScheduledTaskAttempt() { LOG.info("--- START: testKillScheduledTaskAttempt ---"); TezTaskID taskId = getNewTaskID(); scheduleTaskAttempt(taskId); - TezTaskAttemptID lastTAId = mockTask.getLastAttempt().getID(); - killScheduledTaskAttempt(mockTask.getLastAttempt().getID()); + TezTaskAttemptID lastTAId = mockTask.getLastAttempt().getTaskAttemptID(); + killScheduledTaskAttempt(mockTask.getLastAttempt().getTaskAttemptID()); // last killed attempt should be causal TA of next attempt Assert.assertEquals(lastTAId, mockTask.getLastAttempt().getSchedulingCausalTA()); } @@ -489,7 +522,7 @@ public void testLaunchTaskAttempt() { LOG.info("--- START: testLaunchTaskAttempt ---"); TezTaskID taskId = getNewTaskID(); scheduleTaskAttempt(taskId); - launchTaskAttempt(mockTask.getLastAttempt().getID()); + launchTaskAttempt(mockTask.getLastAttempt().getTaskAttemptID()); } @Test(timeout = 5000) @@ -501,13 +534,30 @@ public void testKillRunningTaskAttempt() { LOG.info("--- START: testKillRunningTaskAttempt ---"); TezTaskID taskId = getNewTaskID(); scheduleTaskAttempt(taskId); - TezTaskAttemptID lastTAId = mockTask.getLastAttempt().getID(); - launchTaskAttempt(mockTask.getLastAttempt().getID()); - killRunningTaskAttempt(mockTask.getLastAttempt().getID()); + TezTaskAttemptID lastTAId = mockTask.getLastAttempt().getTaskAttemptID(); + launchTaskAttempt(mockTask.getLastAttempt().getTaskAttemptID()); + killRunningTaskAttempt(mockTask.getLastAttempt().getTaskAttemptID(), TaskState.RUNNING); // last killed attempt should be causal TA of next attempt Assert.assertEquals(lastTAId, mockTask.getLastAttempt().getSchedulingCausalTA()); } + @Test(timeout = 5000) + /** + * Kill running attempt + * {@link TaskState#RUNNING}->{@link TaskState#RUNNING} + */ + public void testKillTaskAttemptServiceBusy() { + LOG.info("--- START: testKillTaskAttemptServiceBusy ---"); + TezTaskID taskId = getNewTaskID(); + scheduleTaskAttempt(taskId); + launchTaskAttempt(mockTask.getLastAttempt().getTaskAttemptID()); + mockTask.handle(createTaskTAKilledEvent( + mockTask.getLastAttempt().getTaskAttemptID(), new ServiceBusyEvent())); + assertTaskRunningState(); + verify(mockTask.getVertex(), times(0)).incrementKilledTaskAttemptCount(); + verify(mockTask.getVertex(), times(1)).incrementRejectedTaskAttemptCount(); + } + /** * {@link TaskState#KILLED}->{@link TaskState#KILLED} */ @@ -516,15 +566,15 @@ public void testKilledAttemptAtTaskKilled() { LOG.info("--- START: testKilledAttemptAtTaskKilled ---"); TezTaskID taskId = getNewTaskID(); scheduleTaskAttempt(taskId); - launchTaskAttempt(mockTask.getLastAttempt().getID()); + launchTaskAttempt(mockTask.getLastAttempt().getTaskAttemptID()); killTask(taskId); - mockTask.handle(createTaskTAKilledEvent(mockTask.getLastAttempt().getID())); + mockTask.handle(createTaskTAKilledEvent(mockTask.getLastAttempt().getTaskAttemptID())); assertEquals(TaskStateInternal.KILLED, mockTask.getInternalState()); // Send duplicate kill for same attempt // This will not happen in practice but this is to simulate handling // of killed attempts in killed state. - mockTask.handle(createTaskTAKilledEvent(mockTask.getLastAttempt().getID())); + mockTask.handle(createTaskTAKilledEvent(mockTask.getLastAttempt().getTaskAttemptID())); assertEquals(TaskStateInternal.KILLED, mockTask.getInternalState()); } @@ -538,12 +588,12 @@ public void testKilledAttemptAtTaskFailed() { TezTaskID taskId = getNewTaskID(); scheduleTaskAttempt(taskId); for (int i = 0; i < mockTask.maxFailedAttempts; ++i) { - mockTask.handle(createTaskTAFailedEvent(mockTask.getLastAttempt().getID())); + mockTask.handle(createTaskTAFailedEvent(mockTask.getLastAttempt().getTaskAttemptID())); } assertEquals(TaskStateInternal.FAILED, mockTask.getInternalState()); // Send kill for an attempt - mockTask.handle(createTaskTAKilledEvent(mockTask.getLastAttempt().getID())); + mockTask.handle(createTaskTAKilledEvent(mockTask.getLastAttempt().getTaskAttemptID())); assertEquals(TaskStateInternal.FAILED, mockTask.getInternalState()); } @@ -559,7 +609,7 @@ public void testFetchedEventsModifyUnderlyingList() { scheduleTaskAttempt(taskId); sendTezEventsToTask(taskId, 2); TezTaskAttemptID attemptID = mockTask.getAttemptList().iterator().next() - .getID(); + .getTaskAttemptID(); fetchedList = mockTask.getTaskAttemptTezEvents(attemptID, 0, 100); assertEquals(2, fetchedList.size()); @@ -581,7 +631,7 @@ public void testTaskProgress() { scheduleTaskAttempt(taskId); float progress = 0f; assert (mockTask.getProgress() == progress); - launchTaskAttempt(mockTask.getLastAttempt().getID()); + launchTaskAttempt(mockTask.getLastAttempt().getTaskAttemptID()); // update attempt1 progress = 50f; @@ -599,13 +649,13 @@ public void testTaskProgress() { // kill first attempt // should trigger a new attempt // as no successful attempts - failRunningTaskAttempt(mockTask.getLastAttempt().getID()); + failRunningTaskAttempt(mockTask.getLastAttempt().getTaskAttemptID()); assert (mockTask.getAttemptList().size() == 2); assertEquals(1, mockTask.failedAttempts); verify(mockTask.getVertex(), times(1)).incrementFailedTaskAttemptCount(); assert (mockTask.getProgress() == 0f); - launchTaskAttempt(mockTask.getLastAttempt().getID()); + launchTaskAttempt(mockTask.getLastAttempt().getTaskAttemptID()); progress = 50f; updateAttemptProgress(mockTask.getLastAttempt(), progress); assert (mockTask.getProgress() == progress); @@ -615,17 +665,17 @@ public void testTaskProgress() { public void testFailureDuringTaskAttemptCommit() { TezTaskID taskId = getNewTaskID(); scheduleTaskAttempt(taskId); - launchTaskAttempt(mockTask.getLastAttempt().getID()); + launchTaskAttempt(mockTask.getLastAttempt().getTaskAttemptID()); updateAttemptState(mockTask.getLastAttempt(), TaskAttemptState.RUNNING); assertTrue("First attempt should commit", - mockTask.canCommit(mockTask.getLastAttempt().getID())); + mockTask.canCommit(mockTask.getLastAttempt().getTaskAttemptID())); // During the task attempt commit there is an exception which causes // the attempt to fail - TezTaskAttemptID lastTAId = mockTask.getLastAttempt().getID(); + TezTaskAttemptID lastTAId = mockTask.getLastAttempt().getTaskAttemptID(); updateAttemptState(mockTask.getLastAttempt(), TaskAttemptState.FAILED); assertEquals(1, mockTask.getAttemptList().size()); - failRunningTaskAttempt(mockTask.getLastAttempt().getID()); + failRunningTaskAttempt(mockTask.getLastAttempt().getTaskAttemptID()); assertEquals(2, mockTask.getAttemptList().size()); assertEquals(1, mockTask.failedAttempts); @@ -633,13 +683,13 @@ public void testFailureDuringTaskAttemptCommit() { Assert.assertEquals(lastTAId, mockTask.getLastAttempt().getSchedulingCausalTA()); assertFalse("First attempt should not commit", - mockTask.canCommit(mockTask.getAttemptList().get(0).getID())); + mockTask.canCommit(mockTask.getAttemptList().get(0).getTaskAttemptID())); updateAttemptState(mockTask.getLastAttempt(), TaskAttemptState.RUNNING); assertTrue("Second attempt should commit", - mockTask.canCommit(mockTask.getLastAttempt().getID())); + mockTask.canCommit(mockTask.getLastAttempt().getTaskAttemptID())); updateAttemptState(mockTask.getLastAttempt(), TaskAttemptState.SUCCEEDED); - mockTask.handle(createTaskTASucceededEvent(mockTask.getLastAttempt().getID())); + mockTask.handle(createTaskTASucceededEvent(mockTask.getLastAttempt().getTaskAttemptID())); assertTaskSucceededState(); } @@ -653,14 +703,14 @@ public void testEventBacklogDuringTaskAttemptCommit() { // simulate // task in scheduled state due to event backlog - real task done and calling canCommit assertFalse("Commit should return false to make running task wait", - mockTask.canCommit(mockTask.getLastAttempt().getID())); - launchTaskAttempt(mockTask.getLastAttempt().getID()); + mockTask.canCommit(mockTask.getLastAttempt().getTaskAttemptID())); + launchTaskAttempt(mockTask.getLastAttempt().getTaskAttemptID()); updateAttemptState(mockTask.getLastAttempt(), TaskAttemptState.RUNNING); assertTrue("Task state in AM is running now. Can commit.", - mockTask.canCommit(mockTask.getLastAttempt().getID())); + mockTask.canCommit(mockTask.getLastAttempt().getTaskAttemptID())); updateAttemptState(mockTask.getLastAttempt(), TaskAttemptState.SUCCEEDED); - mockTask.handle(createTaskTASucceededEvent(mockTask.getLastAttempt().getID())); + mockTask.handle(createTaskTASucceededEvent(mockTask.getLastAttempt().getTaskAttemptID())); assertTaskSucceededState(); } @@ -670,13 +720,13 @@ public void testEventBacklogDuringTaskAttemptCommit() { public void testChangeCommitTaskAttempt() { TezTaskID taskId = getNewTaskID(); scheduleTaskAttempt(taskId); - launchTaskAttempt(mockTask.getLastAttempt().getID()); + launchTaskAttempt(mockTask.getLastAttempt().getTaskAttemptID()); updateAttemptState(mockTask.getLastAttempt(), TaskAttemptState.RUNNING); - TezTaskAttemptID lastTAId = mockTask.getLastAttempt().getID(); + TezTaskAttemptID lastTAId = mockTask.getLastAttempt().getTaskAttemptID(); // Add a speculative task attempt that succeeds - mockTask.handle(createTaskTAAddSpecAttempt(mockTask.getLastAttempt().getID())); - launchTaskAttempt(mockTask.getLastAttempt().getID()); + mockTask.handle(createTaskTAAddSpecAttempt(mockTask.getLastAttempt().getTaskAttemptID())); + launchTaskAttempt(mockTask.getLastAttempt().getTaskAttemptID()); updateAttemptState(mockTask.getLastAttempt(), TaskAttemptState.RUNNING); assertEquals(2, mockTask.getAttemptList().size()); @@ -685,24 +735,24 @@ public void testChangeCommitTaskAttempt() { Assert.assertEquals(lastTAId, mockTask.getLastAttempt().getSchedulingCausalTA()); assertTrue("Second attempt should commit", - mockTask.canCommit(mockTask.getAttemptList().get(1).getID())); + mockTask.canCommit(mockTask.getAttemptList().get(1).getTaskAttemptID())); assertFalse("First attempt should not commit", - mockTask.canCommit(mockTask.getAttemptList().get(0).getID())); + mockTask.canCommit(mockTask.getAttemptList().get(0).getTaskAttemptID())); // During the task attempt commit there is an exception which causes // the second attempt to fail updateAttemptState(mockTask.getLastAttempt(), TaskAttemptState.FAILED); - failRunningTaskAttempt(mockTask.getLastAttempt().getID()); + failRunningTaskAttempt(mockTask.getLastAttempt().getTaskAttemptID()); assertEquals(2, mockTask.getAttemptList().size()); assertFalse("Second attempt should not commit", - mockTask.canCommit(mockTask.getAttemptList().get(1).getID())); + mockTask.canCommit(mockTask.getAttemptList().get(1).getTaskAttemptID())); assertTrue("First attempt should commit", - mockTask.canCommit(mockTask.getAttemptList().get(0).getID())); + mockTask.canCommit(mockTask.getAttemptList().get(0).getTaskAttemptID())); updateAttemptState(mockTask.getAttemptList().get(0), TaskAttemptState.SUCCEEDED); - mockTask.handle(createTaskTASucceededEvent(mockTask.getAttemptList().get(0).getID())); + mockTask.handle(createTaskTASucceededEvent(mockTask.getAttemptList().get(0).getTaskAttemptID())); assertTaskSucceededState(); } @@ -712,15 +762,15 @@ public void testChangeCommitTaskAttempt() { public void testTaskSucceedAndRetroActiveFailure() { TezTaskID taskId = getNewTaskID(); scheduleTaskAttempt(taskId); - launchTaskAttempt(mockTask.getLastAttempt().getID()); + launchTaskAttempt(mockTask.getLastAttempt().getTaskAttemptID()); updateAttemptState(mockTask.getLastAttempt(), TaskAttemptState.RUNNING); - mockTask.handle(createTaskTASucceededEvent(mockTask.getLastAttempt().getID())); + mockTask.handle(createTaskTASucceededEvent(mockTask.getLastAttempt().getTaskAttemptID())); // The task should now have succeeded assertTaskSucceededState(); - verify(mockTask.stateChangeNotifier).taskSucceeded(any(String.class), eq(taskId), - eq(mockTask.getLastAttempt().getID().getId())); + verify(mockTask.stateChangeNotifier).taskSucceeded(any(), eq(taskId), + eq(mockTask.getLastAttempt().getTaskAttemptID().getId())); ArgumentCaptor argumentCaptor = ArgumentCaptor.forClass(DAGHistoryEvent.class); verify(mockHistoryHandler).handle(argumentCaptor.capture()); @@ -739,7 +789,7 @@ public void testTaskSucceedAndRetroActiveFailure() { TaskAttemptEventOutputFailed outputFailedEvent = new TaskAttemptEventOutputFailed(mockDestId, mockTezEvent, 1); mockTask.handle( - createTaskTAFailedEvent(mockTask.getLastAttempt().getID(), TaskFailureType.NON_FATAL, + createTaskTAFailedEvent(mockTask.getLastAttempt().getTaskAttemptID(), TaskFailureType.NON_FATAL, outputFailedEvent)); // The task should still be in the scheduled state @@ -761,19 +811,19 @@ public void testTaskSucceedAndRetroActiveFailure() { public void testTaskSucceedAndRetroActiveKilled() { TezTaskID taskId = getNewTaskID(); scheduleTaskAttempt(taskId); - launchTaskAttempt(mockTask.getLastAttempt().getID()); + launchTaskAttempt(mockTask.getLastAttempt().getTaskAttemptID()); updateAttemptState(mockTask.getLastAttempt(), TaskAttemptState.RUNNING); - mockTask.handle(createTaskTASucceededEvent(mockTask.getLastAttempt().getID())); + mockTask.handle(createTaskTASucceededEvent(mockTask.getLastAttempt().getTaskAttemptID())); // The task should now have succeeded assertTaskSucceededState(); - verify(mockTask.stateChangeNotifier).taskSucceeded(any(String.class), eq(taskId), - eq(mockTask.getLastAttempt().getID().getId())); + verify(mockTask.stateChangeNotifier).taskSucceeded(any(), eq(taskId), + eq(mockTask.getLastAttempt().getTaskAttemptID().getId())); eventHandler.events.clear(); // Now kill the attempt after it has succeeded - mockTask.handle(createTaskTAKilledEvent(mockTask.getLastAttempt().getID())); + mockTask.handle(createTaskTAKilledEvent(mockTask.getLastAttempt().getTaskAttemptID())); // The task should still be in the scheduled state assertTaskScheduledState(); @@ -812,25 +862,25 @@ public void testFailedThenSpeculativeFailed() { TezTaskID taskId = getNewTaskID(); scheduleTaskAttempt(taskId); MockTaskAttemptImpl firstAttempt = mockTask.getLastAttempt(); - launchTaskAttempt(firstAttempt.getID()); + launchTaskAttempt(firstAttempt.getTaskAttemptID()); updateAttemptState(firstAttempt, TaskAttemptState.RUNNING); // Add a speculative task attempt - mockTask.handle(createTaskTAAddSpecAttempt(mockTask.getLastAttempt().getID())); + mockTask.handle(createTaskTAAddSpecAttempt(mockTask.getLastAttempt().getTaskAttemptID())); MockTaskAttemptImpl specAttempt = mockTask.getLastAttempt(); - launchTaskAttempt(specAttempt.getID()); + launchTaskAttempt(specAttempt.getTaskAttemptID()); updateAttemptState(specAttempt, TaskAttemptState.RUNNING); assertEquals(2, mockTask.getAttemptList().size()); // Fail the first attempt updateAttemptState(firstAttempt, TaskAttemptState.FAILED); - mockTask.handle(createTaskTAFailedEvent(firstAttempt.getID())); + mockTask.handle(createTaskTAFailedEvent(firstAttempt.getTaskAttemptID())); assertEquals(TaskState.FAILED, mockTask.getState()); assertEquals(2, mockTask.getAttemptList().size()); // Now fail the speculative attempt updateAttemptState(specAttempt, TaskAttemptState.FAILED); - mockTask.handle(createTaskTAFailedEvent(specAttempt.getID())); + mockTask.handle(createTaskTAFailedEvent(specAttempt.getTaskAttemptID())); assertEquals(TaskState.FAILED, mockTask.getState()); assertEquals(2, mockTask.getAttemptList().size()); } @@ -847,63 +897,102 @@ public void testFailedThenSpeculativeSucceeded() { TezTaskID taskId = getNewTaskID(); scheduleTaskAttempt(taskId); MockTaskAttemptImpl firstAttempt = mockTask.getLastAttempt(); - launchTaskAttempt(firstAttempt.getID()); + launchTaskAttempt(firstAttempt.getTaskAttemptID()); updateAttemptState(firstAttempt, TaskAttemptState.RUNNING); // Add a speculative task attempt - mockTask.handle(createTaskTAAddSpecAttempt(mockTask.getLastAttempt().getID())); + mockTask.handle(createTaskTAAddSpecAttempt(mockTask.getLastAttempt().getTaskAttemptID())); MockTaskAttemptImpl specAttempt = mockTask.getLastAttempt(); - launchTaskAttempt(specAttempt.getID()); + launchTaskAttempt(specAttempt.getTaskAttemptID()); updateAttemptState(specAttempt, TaskAttemptState.RUNNING); assertEquals(2, mockTask.getAttemptList().size()); // Fail the first attempt updateAttemptState(firstAttempt, TaskAttemptState.FAILED); - mockTask.handle(createTaskTAFailedEvent(firstAttempt.getID())); + mockTask.handle(createTaskTAFailedEvent(firstAttempt.getTaskAttemptID())); assertEquals(TaskState.FAILED, mockTask.getState()); assertEquals(2, mockTask.getAttemptList().size()); // Now succeed the speculative attempt updateAttemptState(specAttempt, TaskAttemptState.SUCCEEDED); - mockTask.handle(createTaskTASucceededEvent(specAttempt.getID())); + mockTask.handle(createTaskTASucceededEvent(specAttempt.getTaskAttemptID())); assertEquals(TaskState.FAILED, mockTask.getState()); assertEquals(2, mockTask.getAttemptList().size()); } + @Test + public void testKilledBeforeSpeculatedSucceeded() { + conf.setInt(TezConfiguration.TEZ_AM_TASK_MAX_FAILED_ATTEMPTS, 1); + Vertex vertex = mock(Vertex.class); + doReturn(new VertexImpl.VertexConfigImpl(conf)).when(vertex).getVertexConfig(); + mockTask = new MockTaskImpl(vertexId, partition, + eventHandler, conf, taskCommunicatorManagerInterface, clock, + taskHeartbeatHandler, appContext, leafVertex, + taskResource, containerContext, vertex); + TezTaskID taskId = getNewTaskID(); + scheduleTaskAttempt(taskId); + MockTaskAttemptImpl firstAttempt = mockTask.getLastAttempt(); + launchTaskAttempt(firstAttempt.getTaskAttemptID()); + updateAttemptState(firstAttempt, TaskAttemptState.RUNNING); + + mockTask.handle(createTaskTAKilledEvent(firstAttempt.getTaskAttemptID())); + assertEquals(TaskStateInternal.RUNNING, mockTask.getInternalState()); + + // We need to manually override the current node id + // to induce NPE in the state machine transition + // simulating killed before speculated scenario + NodeId nodeId = mockNodeId; + mockNodeId = null; + + // Add a speculative task attempt + mockTask.handle(createTaskTAAddSpecAttempt(mockTask.getLastAttempt().getTaskAttemptID())); + mockNodeId = nodeId; + MockTaskAttemptImpl specAttempt = mockTask.getLastAttempt(); + launchTaskAttempt(specAttempt.getTaskAttemptID()); + updateAttemptState(specAttempt, TaskAttemptState.RUNNING); + assertEquals(3, mockTask.getAttemptList().size()); + + // Now succeed the speculative attempt + updateAttemptState(specAttempt, TaskAttemptState.SUCCEEDED); + mockTask.handle(createTaskTASucceededEvent(specAttempt.getTaskAttemptID())); + assertEquals(TaskState.SUCCEEDED, mockTask.getState()); + assertEquals(3, mockTask.getAttemptList().size()); + } + @Test(timeout = 20000) public void testKilledAttemptUpdatesDAGScheduler() { TezTaskID taskId = getNewTaskID(); scheduleTaskAttempt(taskId); MockTaskAttemptImpl firstAttempt = mockTask.getLastAttempt(); - launchTaskAttempt(firstAttempt.getID()); + launchTaskAttempt(firstAttempt.getTaskAttemptID()); updateAttemptState(firstAttempt, TaskAttemptState.RUNNING); // Add a speculative task attempt - mockTask.handle(createTaskTAAddSpecAttempt(firstAttempt.getID())); + mockTask.handle(createTaskTAAddSpecAttempt(firstAttempt.getTaskAttemptID())); MockTaskAttemptImpl specAttempt = mockTask.getLastAttempt(); - launchTaskAttempt(specAttempt.getID()); + launchTaskAttempt(specAttempt.getTaskAttemptID()); updateAttemptState(specAttempt, TaskAttemptState.RUNNING); assertEquals(2, mockTask.getAttemptList().size()); // Have the first task succeed eventHandler.events.clear(); - mockTask.handle(createTaskTASucceededEvent(firstAttempt.getID())); + mockTask.handle(createTaskTASucceededEvent(firstAttempt.getTaskAttemptID())); verifyOutgoingEvents(eventHandler.events, DAGEventType.DAG_SCHEDULER_UPDATE, VertexEventType.V_TASK_COMPLETED, VertexEventType.V_TASK_ATTEMPT_COMPLETED); // The task should now have succeeded and sent kill to other attempt assertTaskSucceededState(); - verify(mockTask.stateChangeNotifier).taskSucceeded(any(String.class), eq(taskId), - eq(firstAttempt.getID().getId())); + verify(mockTask.stateChangeNotifier).taskSucceeded(any(), eq(taskId), + eq(firstAttempt.getTaskAttemptID().getId())); @SuppressWarnings("rawtypes") Event event = eventHandler.events.get(eventHandler.events.size()-1); assertEquals(TaskAttemptEventType.TA_KILL_REQUEST, event.getType()); - assertEquals(specAttempt.getID(), + assertEquals(specAttempt.getTaskAttemptID(), ((TaskAttemptEventKillRequest) event).getTaskAttemptID()); eventHandler.events.clear(); // Emulate the spec attempt being killed - mockTask.handle(createTaskTAKilledEvent(specAttempt.getID())); + mockTask.handle(createTaskTAKilledEvent(specAttempt.getTaskAttemptID())); assertTaskSucceededState(); verifyOutgoingEvents(eventHandler.events, DAGEventType.DAG_SCHEDULER_UPDATE, VertexEventType.V_TASK_ATTEMPT_COMPLETED); @@ -914,32 +1003,32 @@ public void testSpeculatedThenRetroactiveFailure() { TezTaskID taskId = getNewTaskID(); scheduleTaskAttempt(taskId); MockTaskAttemptImpl firstAttempt = mockTask.getLastAttempt(); - launchTaskAttempt(firstAttempt.getID()); + launchTaskAttempt(firstAttempt.getTaskAttemptID()); updateAttemptState(firstAttempt, TaskAttemptState.RUNNING); // Add a speculative task attempt - mockTask.handle(createTaskTAAddSpecAttempt(firstAttempt.getID())); + mockTask.handle(createTaskTAAddSpecAttempt(firstAttempt.getTaskAttemptID())); MockTaskAttemptImpl specAttempt = mockTask.getLastAttempt(); - launchTaskAttempt(specAttempt.getID()); + launchTaskAttempt(specAttempt.getTaskAttemptID()); updateAttemptState(specAttempt, TaskAttemptState.RUNNING); assertEquals(2, mockTask.getAttemptList().size()); // Have the first task succeed eventHandler.events.clear(); - mockTask.handle(createTaskTASucceededEvent(firstAttempt.getID())); + mockTask.handle(createTaskTASucceededEvent(firstAttempt.getTaskAttemptID())); // The task should now have succeeded and sent kill to other attempt assertTaskSucceededState(); - verify(mockTask.stateChangeNotifier).taskSucceeded(any(String.class), eq(taskId), - eq(firstAttempt.getID().getId())); + verify(mockTask.stateChangeNotifier).taskSucceeded(any(), eq(taskId), + eq(firstAttempt.getTaskAttemptID().getId())); @SuppressWarnings("rawtypes") Event event = eventHandler.events.get(eventHandler.events.size()-1); assertEquals(TaskAttemptEventType.TA_KILL_REQUEST, event.getType()); - assertEquals(specAttempt.getID(), + assertEquals(specAttempt.getTaskAttemptID(), ((TaskAttemptEventKillRequest) event).getTaskAttemptID()); // Emulate the spec attempt being killed - mockTask.handle(createTaskTAKilledEvent(specAttempt.getID())); + mockTask.handle(createTaskTAKilledEvent(specAttempt.getTaskAttemptID())); assertTaskSucceededState(); // Now fail the attempt after it has succeeded @@ -950,7 +1039,7 @@ public void testSpeculatedThenRetroactiveFailure() { TaskAttemptEventOutputFailed outputFailedEvent = new TaskAttemptEventOutputFailed(mockDestId, mockTezEvent, 1); eventHandler.events.clear(); - mockTask.handle(createTaskTAFailedEvent(firstAttempt.getID(), TaskFailureType.NON_FATAL, outputFailedEvent)); + mockTask.handle(createTaskTAFailedEvent(firstAttempt.getTaskAttemptID(), TaskFailureType.NON_FATAL, outputFailedEvent)); // The task should still be in the scheduled state assertTaskScheduledState(); @@ -965,16 +1054,50 @@ public void testSpeculatedThenRetroactiveFailure() { Assert.assertEquals(mockDestId, newAttempt.getSchedulingCausalTA()); } + @Test(timeout = 20000) + public void testIgnoreSpeculationOnSuccessfulOriginalAttempt() { + TezTaskID taskId = getNewTaskID(); + scheduleTaskAttempt(taskId); + MockTaskAttemptImpl firstAttempt = mockTask.getLastAttempt(); + launchTaskAttempt(firstAttempt.getTaskAttemptID()); + // Mock success of the first task attempt + updateAttemptState(firstAttempt, TaskAttemptState.SUCCEEDED); + firstAttempt.handle(new TaskAttemptEvent(firstAttempt.getTaskAttemptID(), TaskAttemptEventType.TA_DONE)); + + // Verify the speculation scheduling is ignored and no speculative attempt was added to the task + mockTask.handle(createTaskTAAddSpecAttempt(firstAttempt.getTaskAttemptID())); + MockTaskAttemptImpl specAttempt = mockTask.getLastAttempt(); + launchTaskAttempt(specAttempt.getTaskAttemptID()); + assertEquals(1, mockTask.getAttemptList().size()); + } + + @Test(timeout = 20000) + public void testIgnoreSpeculationAfterOriginalAttemptCommit() { + TezTaskID taskId = getNewTaskID(); + scheduleTaskAttempt(taskId); + MockTaskAttemptImpl firstAttempt = mockTask.getLastAttempt(); + launchTaskAttempt(firstAttempt.getTaskAttemptID()); + updateAttemptState(firstAttempt, TaskAttemptState.RUNNING); + // Mock commit of the first task attempt + mockTask.canCommit(firstAttempt.getTaskAttemptID()); + + // Verify the speculation scheduling is ignored and no speculative attempt was added to the task + mockTask.handle(createTaskTAAddSpecAttempt(firstAttempt.getTaskAttemptID())); + MockTaskAttemptImpl specAttempt = mockTask.getLastAttempt(); + launchTaskAttempt(specAttempt.getTaskAttemptID()); + assertEquals(1, mockTask.getAttemptList().size()); + } + @SuppressWarnings("rawtypes") @Test public void testSucceededAttemptStatusWithRetroActiveFailures() throws InterruptedException { TezTaskID taskId = getNewTaskID(); scheduleTaskAttempt(taskId); MockTaskAttemptImpl firstMockTaskAttempt = mockTask.getAttemptList().get(0); - launchTaskAttempt(firstMockTaskAttempt.getID()); - mockTask.handle(createTaskTAAddSpecAttempt(mockTask.getLastAttempt().getID())); + launchTaskAttempt(firstMockTaskAttempt.getTaskAttemptID()); + mockTask.handle(createTaskTAAddSpecAttempt(mockTask.getLastAttempt().getTaskAttemptID())); MockTaskAttemptImpl secondMockTaskAttempt = mockTask.getAttemptList().get(1); - launchTaskAttempt(secondMockTaskAttempt.getID()); + launchTaskAttempt(secondMockTaskAttempt.getTaskAttemptID()); firstMockTaskAttempt.handle(new TaskAttemptEventSchedule( TezTaskAttemptID.fromString(firstMockTaskAttempt.toString()), 10, 10)); @@ -996,8 +1119,8 @@ public void testSucceededAttemptStatusWithRetroActiveFailures() throws Interrupt new TaskAttemptEvent(TezTaskAttemptID.fromString(firstMockTaskAttempt.toString()), TaskAttemptEventType.TA_DONE)); - mockTask.handle(new TaskEventTASucceeded(secondMockTaskAttempt.getID())); - mockTask.handle(new TaskEventTASucceeded(firstMockTaskAttempt.getID())); + mockTask.handle(new TaskEventTASucceeded(secondMockTaskAttempt.getTaskAttemptID())); + mockTask.handle(new TaskEventTASucceeded(firstMockTaskAttempt.getTaskAttemptID())); assertTrue("Attempts should have succeeded!", firstMockTaskAttempt.getInternalState() == TaskAttemptStateInternal.SUCCEEDED && secondMockTaskAttempt.getInternalState() == TaskAttemptStateInternal.SUCCEEDED); @@ -1025,10 +1148,10 @@ public void testFailedAttemptStatus() throws InterruptedException { TezTaskID taskId = getNewTaskID(); scheduleTaskAttempt(taskId); MockTaskAttemptImpl firstMockTaskAttempt = mockTask.getAttemptList().get(0); - launchTaskAttempt(firstMockTaskAttempt.getID()); - mockTask.handle(createTaskTAAddSpecAttempt(mockTask.getLastAttempt().getID())); + launchTaskAttempt(firstMockTaskAttempt.getTaskAttemptID()); + mockTask.handle(createTaskTAAddSpecAttempt(mockTask.getLastAttempt().getTaskAttemptID())); MockTaskAttemptImpl secondMockTaskAttempt = mockTask.getAttemptList().get(1); - launchTaskAttempt(secondMockTaskAttempt.getID()); + launchTaskAttempt(secondMockTaskAttempt.getTaskAttemptID()); firstMockTaskAttempt.handle(new TaskAttemptEventSchedule( TezTaskAttemptID.fromString(firstMockTaskAttempt.toString()), 10, 10)); @@ -1053,12 +1176,12 @@ public void testFailedAttemptStatus() throws InterruptedException { TaskAttemptTerminationCause.NO_PROGRESS)); firstMockTaskAttempt.handle(new TaskAttemptEventContainerTerminated(mockContainerId, - firstMockTaskAttempt.getID(), "test", TaskAttemptTerminationCause.NO_PROGRESS)); + firstMockTaskAttempt.getTaskAttemptID(), "test", TaskAttemptTerminationCause.NO_PROGRESS)); secondMockTaskAttempt.handle(new TaskAttemptEventContainerTerminated(mockContainerId, - secondMockTaskAttempt.getID(), "test", TaskAttemptTerminationCause.NO_PROGRESS)); - mockTask.handle(new TaskEventTAFailed(secondMockTaskAttempt.getID(), TaskFailureType.NON_FATAL, + secondMockTaskAttempt.getTaskAttemptID(), "test", TaskAttemptTerminationCause.NO_PROGRESS)); + mockTask.handle(new TaskEventTAFailed(secondMockTaskAttempt.getTaskAttemptID(), TaskFailureType.NON_FATAL, mock(TaskAttemptEvent.class))); - mockTask.handle(new TaskEventTAFailed(firstMockTaskAttempt.getID(), TaskFailureType.NON_FATAL, + mockTask.handle(new TaskEventTAFailed(firstMockTaskAttempt.getTaskAttemptID(), TaskFailureType.NON_FATAL, mock(TaskAttemptEvent.class))); assertTrue("Attempts should have failed!", firstMockTaskAttempt.getInternalState() == TaskAttemptStateInternal.FAILED @@ -1067,6 +1190,62 @@ public void testFailedAttemptStatus() throws InterruptedException { assertTrue("Task should have failed!", mockTask.getState() == TaskState.FAILED); } + @SuppressWarnings("rawtypes") + @Test (timeout = 10000L) + public void testSucceededLeafTaskWithRetroFailures() throws InterruptedException { + Configuration newConf = new Configuration(conf); + newConf.setInt(TezConfiguration.TEZ_AM_TASK_MAX_FAILED_ATTEMPTS, 1); + Vertex vertex = mock(Vertex.class); + doReturn(new VertexImpl.VertexConfigImpl(newConf)).when(vertex).getVertexConfig(); + mockTask = new MockTaskImpl(vertexId, partition, + eventHandler, conf, taskCommunicatorManagerInterface, clock, + taskHeartbeatHandler, appContext, true, + taskResource, containerContext, vertex); + TezTaskID taskId = getNewTaskID(); + scheduleTaskAttempt(taskId); + MockTaskAttemptImpl firstMockTaskAttempt = mockTask.getAttemptList().get(0); + launchTaskAttempt(firstMockTaskAttempt.getTaskAttemptID()); + mockTask.handle(createTaskTAAddSpecAttempt(mockTask.getLastAttempt().getTaskAttemptID())); + MockTaskAttemptImpl secondMockTaskAttempt = mockTask.getAttemptList().get(1); + launchTaskAttempt(secondMockTaskAttempt.getTaskAttemptID()); + + firstMockTaskAttempt.handle(new TaskAttemptEventSchedule( + TezTaskAttemptID.fromString(firstMockTaskAttempt.toString()), 10, 10)); + secondMockTaskAttempt.handle(new TaskAttemptEventSchedule( + TezTaskAttemptID.fromString(secondMockTaskAttempt.toString()), 10, 10)); + firstMockTaskAttempt.handle(new TaskAttemptEventSubmitted( + TezTaskAttemptID.fromString(firstMockTaskAttempt.toString()), mockContainer.getId())); + secondMockTaskAttempt.handle(new TaskAttemptEventSubmitted( + TezTaskAttemptID.fromString(secondMockTaskAttempt.toString()), mockContainer.getId())); + + secondMockTaskAttempt.handle( + new TaskAttemptEventStartedRemotely(TezTaskAttemptID.fromString(secondMockTaskAttempt.toString()))); + firstMockTaskAttempt.handle( + new TaskAttemptEventStartedRemotely(TezTaskAttemptID.fromString(firstMockTaskAttempt.toString()))); + secondMockTaskAttempt.handle( + new TaskAttemptEvent(TezTaskAttemptID.fromString(secondMockTaskAttempt.toString()), + TaskAttemptEventType.TA_DONE)); + firstMockTaskAttempt.handle( + new TaskAttemptEventAttemptFailed(TezTaskAttemptID.fromString(firstMockTaskAttempt.toString()), + TaskAttemptEventType.TA_FAILED, TaskFailureType.NON_FATAL, "test", + TaskAttemptTerminationCause.CONTAINER_EXITED)); + + mockTask.handle(new TaskEventTASucceeded(secondMockTaskAttempt.getTaskAttemptID())); + firstMockTaskAttempt.handle(new TaskAttemptEventContainerTerminated(mockContainerId, + firstMockTaskAttempt.getTaskAttemptID(), "test", TaskAttemptTerminationCause.NO_PROGRESS)); + + InputReadErrorEvent mockReEvent = InputReadErrorEvent.create("", 0, 0); + TezTaskAttemptID mockDestId = firstMockTaskAttempt.getTaskAttemptID(); + EventMetaData meta = new EventMetaData(EventProducerConsumerType.INPUT, "Vertex", "Edge", mockDestId); + TezEvent tzEvent = new TezEvent(mockReEvent, meta); + TaskAttemptEventOutputFailed outputFailedEvent = + new TaskAttemptEventOutputFailed(mockDestId, tzEvent, 1); + firstMockTaskAttempt.handle(outputFailedEvent); + mockTask.handle(new TaskEventTAFailed(firstMockTaskAttempt.getTaskAttemptID(), TaskFailureType.NON_FATAL, + mock(TaskAttemptEvent.class))); + Assert.assertEquals(mockTask.getInternalState(), TaskStateInternal.SUCCEEDED); + } + private void failAttempt(MockTaskAttemptImpl taskAttempt, int index, int expectedIncompleteAttempts) { InputReadErrorEvent mockReEvent = InputReadErrorEvent.create("", 0, index); TezTaskAttemptID mockDestId = mock(TezTaskAttemptID.class); @@ -1076,7 +1255,7 @@ private void failAttempt(MockTaskAttemptImpl taskAttempt, int index, int expecte new TaskAttemptEventOutputFailed(mockDestId, tzEvent, 1); taskAttempt.handle( outputFailedEvent); - TaskEvent tEventFail1 = new TaskEventTAFailed(taskAttempt.getID(), TaskFailureType.NON_FATAL, outputFailedEvent); + TaskEvent tEventFail1 = new TaskEventTAFailed(taskAttempt.getTaskAttemptID(), TaskFailureType.NON_FATAL, outputFailedEvent); mockTask.handle(tEventFail1); assertEquals("Unexpected number of incomplete attempts!", expectedIncompleteAttempts, mockTask.getUncompletedAttemptsCount()); @@ -1095,10 +1274,10 @@ public void testFailedTaskTransitionWithLaunchedAttempt() throws InterruptedExce TezTaskID taskId = getNewTaskID(); scheduleTaskAttempt(taskId); MockTaskAttemptImpl firstMockTaskAttempt = mockTask.getLastAttempt(); - launchTaskAttempt(firstMockTaskAttempt.getID()); - mockTask.handle(createTaskTAAddSpecAttempt(mockTask.getLastAttempt().getID())); + launchTaskAttempt(firstMockTaskAttempt.getTaskAttemptID()); + mockTask.handle(createTaskTAAddSpecAttempt(mockTask.getLastAttempt().getTaskAttemptID())); MockTaskAttemptImpl secondMockTaskAttempt = mockTask.getLastAttempt(); - launchTaskAttempt(secondMockTaskAttempt.getID()); + launchTaskAttempt(secondMockTaskAttempt.getTaskAttemptID()); firstMockTaskAttempt.handle(new TaskAttemptEventSchedule( TezTaskAttemptID.fromString(firstMockTaskAttempt.toString()), 10, 10)); @@ -1123,21 +1302,21 @@ public void testFailedTaskTransitionWithLaunchedAttempt() throws InterruptedExce TaskAttemptTerminationCause.NO_PROGRESS)); firstMockTaskAttempt.handle(new TaskAttemptEventContainerTerminated(mockContainerId, - firstMockTaskAttempt.getID(), "test", TaskAttemptTerminationCause.NO_PROGRESS)); + firstMockTaskAttempt.getTaskAttemptID(), "test", TaskAttemptTerminationCause.NO_PROGRESS)); secondMockTaskAttempt.handle(new TaskAttemptEventContainerTerminated(mockContainerId, - secondMockTaskAttempt.getID(), "test", TaskAttemptTerminationCause.NO_PROGRESS)); - mockTask.handle(new TaskEventTAFailed(secondMockTaskAttempt.getID(), TaskFailureType.NON_FATAL, + secondMockTaskAttempt.getTaskAttemptID(), "test", TaskAttemptTerminationCause.NO_PROGRESS)); + mockTask.handle(new TaskEventTAFailed(secondMockTaskAttempt.getTaskAttemptID(), TaskFailureType.NON_FATAL, mock(TaskAttemptEvent.class))); - mockTask.handle(new TaskEventTAFailed(firstMockTaskAttempt.getID(), TaskFailureType.NON_FATAL, + mockTask.handle(new TaskEventTAFailed(firstMockTaskAttempt.getTaskAttemptID(), TaskFailureType.NON_FATAL, mock(TaskAttemptEvent.class))); assertTrue("Attempts should have failed!", firstMockTaskAttempt.getInternalState() == TaskAttemptStateInternal.FAILED && secondMockTaskAttempt.getInternalState() == TaskAttemptStateInternal.FAILED); assertEquals("Task should have no uncompleted attempts!", 0, mockTask.getUncompletedAttemptsCount()); assertTrue("Task should have failed!", mockTask.getState() == TaskState.FAILED); - mockTask.handle(createTaskTAAddSpecAttempt(mockTask.getLastAttempt().getID())); + mockTask.handle(createTaskTAAddSpecAttempt(mockTask.getLastAttempt().getTaskAttemptID())); MockTaskAttemptImpl thirdMockTaskAttempt = mockTask.getLastAttempt(); - mockTask.handle(createTaskTALauncherEvent(thirdMockTaskAttempt.getID())); + mockTask.handle(createTaskTALauncherEvent(thirdMockTaskAttempt.getTaskAttemptID())); } @Test (timeout = 30000) @@ -1145,10 +1324,10 @@ public void testKilledTaskTransitionWithLaunchedAttempt() throws InterruptedExce TezTaskID taskId = getNewTaskID(); scheduleTaskAttempt(taskId); MockTaskAttemptImpl firstMockTaskAttempt = mockTask.getLastAttempt(); - launchTaskAttempt(firstMockTaskAttempt.getID()); - mockTask.handle(createTaskTAAddSpecAttempt(mockTask.getLastAttempt().getID())); + launchTaskAttempt(firstMockTaskAttempt.getTaskAttemptID()); + mockTask.handle(createTaskTAAddSpecAttempt(mockTask.getLastAttempt().getTaskAttemptID())); MockTaskAttemptImpl secondMockTaskAttempt = mockTask.getLastAttempt(); - launchTaskAttempt(secondMockTaskAttempt.getID()); + launchTaskAttempt(secondMockTaskAttempt.getTaskAttemptID()); firstMockTaskAttempt.handle(new TaskAttemptEventSchedule( TezTaskAttemptID.fromString(firstMockTaskAttempt.toString()), 10, 10)); @@ -1163,30 +1342,30 @@ public void testKilledTaskTransitionWithLaunchedAttempt() throws InterruptedExce new TaskAttemptEventStartedRemotely(TezTaskAttemptID.fromString(secondMockTaskAttempt.toString()))); firstMockTaskAttempt.handle( new TaskAttemptEventStartedRemotely(TezTaskAttemptID.fromString(firstMockTaskAttempt.toString()))); - mockTask.handle(new TaskEventTermination(mockTask.getTaskId(), + mockTask.handle(new TaskEventTermination(mockTask.getTaskID(), TaskAttemptTerminationCause.FRAMEWORK_ERROR, "test")); secondMockTaskAttempt.handle( new TaskAttemptEventAttemptKilled(TezTaskAttemptID.fromString(secondMockTaskAttempt.toString()),"test", TaskAttemptTerminationCause.FRAMEWORK_ERROR)); - mockTask.handle(new TaskEventTAKilled(secondMockTaskAttempt.getID(), - new TaskAttemptEvent(secondMockTaskAttempt.getID(), TaskAttemptEventType.TA_KILLED))); + mockTask.handle(new TaskEventTAKilled(secondMockTaskAttempt.getTaskAttemptID(), + new TaskAttemptEvent(secondMockTaskAttempt.getTaskAttemptID(), TaskAttemptEventType.TA_KILLED))); firstMockTaskAttempt.handle( new TaskAttemptEventAttemptKilled(TezTaskAttemptID.fromString(firstMockTaskAttempt.toString()),"test", TaskAttemptTerminationCause.FRAMEWORK_ERROR)); - mockTask.handle(new TaskEventTAKilled(firstMockTaskAttempt.getID(), - new TaskAttemptEvent(firstMockTaskAttempt.getID(), TaskAttemptEventType.TA_KILLED))); + mockTask.handle(new TaskEventTAKilled(firstMockTaskAttempt.getTaskAttemptID(), + new TaskAttemptEvent(firstMockTaskAttempt.getTaskAttemptID(), TaskAttemptEventType.TA_KILLED))); firstMockTaskAttempt.handle( new TaskAttemptEventAttemptKilled(TezTaskAttemptID.fromString(firstMockTaskAttempt.toString()),"test", TaskAttemptTerminationCause.FRAMEWORK_ERROR)); assertEquals("Task should have been killed!", mockTask.getInternalState(), TaskStateInternal.KILLED); - mockTask.handle(createTaskTAAddSpecAttempt(mockTask.getLastAttempt().getID())); + mockTask.handle(createTaskTAAddSpecAttempt(mockTask.getLastAttempt().getTaskAttemptID())); MockTaskAttemptImpl thirdMockTaskAttempt = mockTask.getLastAttempt(); - mockTask.handle(createTaskTALauncherEvent(thirdMockTaskAttempt.getID())); - mockTask.handle(createTaskTAAddSpecAttempt(mockTask.getLastAttempt().getID())); + mockTask.handle(createTaskTALauncherEvent(thirdMockTaskAttempt.getTaskAttemptID())); + mockTask.handle(createTaskTAAddSpecAttempt(mockTask.getLastAttempt().getTaskAttemptID())); MockTaskAttemptImpl fourthMockTaskAttempt = mockTask.getLastAttempt(); - mockTask.handle(createTaskTASucceededEvent(fourthMockTaskAttempt.getID())); + mockTask.handle(createTaskTASucceededEvent(fourthMockTaskAttempt.getTaskAttemptID())); MockTaskAttemptImpl fifthMockTaskAttempt = mockTask.getLastAttempt(); - mockTask.handle(createTaskTAFailedEvent(fifthMockTaskAttempt.getID())); + mockTask.handle(createTaskTAFailedEvent(fifthMockTaskAttempt.getTaskAttemptID())); } // TODO Add test to validate the correct commit attempt. @@ -1239,7 +1418,7 @@ public MockTaskImpl(TezVertexID vertexId, int partition, @Override protected TaskAttemptImpl createAttempt(int attemptNumber, TezTaskAttemptID schedCausalTA) { MockTaskAttemptImpl attempt = new MockTaskAttemptImpl( - TezBuilderUtils.newTaskAttemptId(getTaskId(), attemptNumber), + TezBuilderUtils.newTaskAttemptId(getTaskID(), attemptNumber), eventHandler, taskCommunicatorManagerInterface, conf, clock, taskHeartbeatHandler, appContext, true, taskResource, containerContext, schedCausalTA); @@ -1292,7 +1471,7 @@ public MockTaskAttemptImpl(TezTaskAttemptID attemptId, boolean isRescheduled, Resource resource, ContainerContext containerContext, TezTaskAttemptID schedCausalTA) { super(attemptId, eventHandler, tal, conf, clock, thh, - appContext, isRescheduled, resource, containerContext, false, null, + appContext, isRescheduled, resource, containerContext, false, mockTask, locationHint, mockTaskSpec, schedCausalTA); } @@ -1328,6 +1507,23 @@ public TaskAttemptState getStateNoLock() { public ContainerId getAssignedContainerID() { return mockContainerId; } + + @Override + public NodeId getNodeId() { + return mockNodeId; + } } + public class ServiceBusyEvent extends TezAbstractEvent + implements TaskAttemptEventTerminationCauseEvent { + public ServiceBusyEvent() { + super(TaskAttemptEventType.TA_KILLED); + } + + @Override + public TaskAttemptTerminationCause getTerminationCause() { + return TaskAttemptTerminationCause.SERVICE_BUSY; + } + } } + diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestVertexImpl.java b/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestVertexImpl.java index d382974b29..139f2fd915 100644 --- a/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestVertexImpl.java +++ b/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestVertexImpl.java @@ -20,12 +20,28 @@ import java.nio.ByteBuffer; +import org.apache.tez.common.TezUtils; +import org.apache.tez.common.security.JobTokenSecretManager; +import org.apache.tez.dag.api.NamedEntityDescriptor; +import org.apache.tez.dag.app.DAGAppMaster; +import org.apache.tez.dag.app.dag.TaskAttempt; +import org.apache.tez.dag.app.launcher.ContainerLauncherManager; +import org.apache.tez.dag.app.launcher.TezContainerLauncherImpl; +import org.apache.tez.dag.app.rm.container.AMContainer; +import org.apache.tez.serviceplugins.api.ContainerLauncherDescriptor; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; -import static org.mockito.Matchers.any; -import static org.mockito.Matchers.anyInt; -import static org.mockito.Mockito.*; +import static org.mockito.Mockito.RETURNS_DEEP_STUBS; +import static org.mockito.Mockito.any; +import static org.mockito.Mockito.anyInt; +import static org.mockito.Mockito.doAnswer; +import static org.mockito.Mockito.doCallRealMethod; +import static org.mockito.Mockito.doReturn; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.spy; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.when; import java.io.ByteArrayOutputStream; import java.io.DataInput; @@ -76,6 +92,7 @@ import org.apache.tez.runtime.library.common.shuffle.ShuffleUtils; import org.apache.tez.runtime.library.shuffle.impl.ShuffleUserPayloads; import org.apache.tez.test.GraceShuffleVertexManagerForTest; +import org.mockito.Mockito; import org.roaringbitmap.RoaringBitmap; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -221,7 +238,6 @@ import org.junit.BeforeClass; import org.junit.Ignore; import org.junit.Test; -import org.mockito.Mockito; import org.mockito.internal.util.collections.Sets; import com.google.common.base.Joiner; @@ -386,7 +402,7 @@ private class TaskAttemptEventDispatcher implements EventHandler)task.getAttempt( event.getTaskAttemptID())).handle(event); @@ -399,7 +415,7 @@ private class TaskEventDispatcher implements EventHandler { @Override public void handle(TaskEvent event) { events.add(event); - VertexImpl vertex = vertexIdMap.get(event.getTaskID().getVertexID()); + VertexImpl vertex = vertexIdMap.get(event.getVertexID()); Task task = vertex.getTask(event.getTaskID()); if (task != null) { ((EventHandler)task).handle(event); @@ -437,7 +453,7 @@ private class VertexEventDispatcher @Override public void handle(VertexEvent event) { - VertexImpl vertex = vertexIdMap.get(event.getVertexId()); + VertexImpl vertex = vertexIdMap.get(event.getVertexID()); ((EventHandler) vertex).handle(event); } } @@ -793,9 +809,9 @@ private DAGPlan createDAGPlanWithNonExistVertexManager() { return dag; } - private DAGPlan createDAGPlanWithMixedEdges() { + private DAGPlan createDAGPlanWithMixedEdges(String dagName) { LOG.info("Setting up mixed edge dag plan"); - org.apache.tez.dag.api.DAG dag = org.apache.tez.dag.api.DAG.create("MixedEdges"); + org.apache.tez.dag.api.DAG dag = org.apache.tez.dag.api.DAG.create("DAG-" + dagName); org.apache.tez.dag.api.Vertex v1 = org.apache.tez.dag.api.Vertex.create("vertex1", ProcessorDescriptor.create("v1.class"), 1, Resource.newInstance(0, 0)); org.apache.tez.dag.api.Vertex v2 = org.apache.tez.dag.api.Vertex.create("vertex2", @@ -2395,12 +2411,254 @@ private DAGPlan createDAGPlanForGraceParallelism() throws IOException { .build(); } - private void setupVertices() { + /** + * The dag is of the following structure. + * vertex1 vertex2 + * \ / + * vertex 3 + * / \ + * vertex4 vertex5 + * \ / + * vertex6 + * @return dagPlan + */ + + public DAGPlan createDAGPlanVertexShuffleDelete() { + LOG.info("Setting up dag plan"); + DAGPlan dag = DAGPlan.newBuilder() + .setName("testverteximpl") + .setDagConf(DAGProtos.ConfigurationProto.newBuilder() + .addConfKeyValues(DAGProtos.PlanKeyValuePair.newBuilder() + .setKey(TezConfiguration.TEZ_AM_TASK_MAX_FAILED_ATTEMPTS) + .setValue(3 + ""))) + .addVertex( + VertexPlan.newBuilder() + .setName("vertex1") + .setType(PlanVertexType.NORMAL) + .addTaskLocationHint( + PlanTaskLocationHint.newBuilder() + .addHost("host1") + .addRack("rack1") + .build() + ) + .setTaskConfig( + PlanTaskConfiguration.newBuilder() + .setNumTasks(1) + .setVirtualCores(4) + .setMemoryMb(1024) + .setJavaOpts("") + .setTaskModule("x1.y1") + .build() + ) + .setVertexConf(DAGProtos.ConfigurationProto.newBuilder() + .addConfKeyValues(DAGProtos.PlanKeyValuePair.newBuilder() + .setKey(TezConfiguration.TEZ_AM_TASK_MAX_FAILED_ATTEMPTS) + .setValue(2+""))) + .addOutEdgeId("e1") + .build() + ) + .addVertex( + VertexPlan.newBuilder() + .setName("vertex2") + .setType(PlanVertexType.NORMAL) + .addTaskLocationHint( + PlanTaskLocationHint.newBuilder() + .addHost("host2") + .addRack("rack2") + .build() + ) + .setTaskConfig( + PlanTaskConfiguration.newBuilder() + .setNumTasks(2) + .setVirtualCores(4) + .setMemoryMb(1024) + .setJavaOpts("") + .setTaskModule("x2.y2") + .build() + ) + .addOutEdgeId("e2") + .build() + ) + .addVertex( + VertexPlan.newBuilder() + .setName("vertex3") + .setType(PlanVertexType.NORMAL) + .setProcessorDescriptor(TezEntityDescriptorProto.newBuilder().setClassName("x3.y3")) + .addTaskLocationHint( + PlanTaskLocationHint.newBuilder() + .addHost("host3") + .addRack("rack3") + .build() + ) + .setTaskConfig( + PlanTaskConfiguration.newBuilder() + .setNumTasks(2) + .setVirtualCores(4) + .setMemoryMb(1024) + .setJavaOpts("foo") + .setTaskModule("x3.y3") + .build() + ) + .addInEdgeId("e1") + .addInEdgeId("e2") + .addOutEdgeId("e3") + .addOutEdgeId("e4") + .build() + ) + .addVertex( + VertexPlan.newBuilder() + .setName("vertex4") + .setType(PlanVertexType.NORMAL) + .addTaskLocationHint( + PlanTaskLocationHint.newBuilder() + .addHost("host4") + .addRack("rack4") + .build() + ) + .setTaskConfig( + PlanTaskConfiguration.newBuilder() + .setNumTasks(2) + .setVirtualCores(4) + .setMemoryMb(1024) + .setJavaOpts("") + .setTaskModule("x4.y4") + .build() + ) + .addInEdgeId("e3") + .addOutEdgeId("e5") + .build() + ) + .addVertex( + VertexPlan.newBuilder() + .setName("vertex5") + .setType(PlanVertexType.NORMAL) + .addTaskLocationHint( + PlanTaskLocationHint.newBuilder() + .addHost("host5") + .addRack("rack5") + .build() + ) + .setTaskConfig( + PlanTaskConfiguration.newBuilder() + .setNumTasks(2) + .setVirtualCores(4) + .setMemoryMb(1024) + .setJavaOpts("") + .setTaskModule("x5.y5") + .build() + ) + .addInEdgeId("e4") + .addOutEdgeId("e6") + .build() + ) + .addVertex( + VertexPlan.newBuilder() + .setName("vertex6") + .setType(PlanVertexType.NORMAL) + .addTaskLocationHint( + PlanTaskLocationHint.newBuilder() + .addHost("host6") + .addRack("rack6") + .build() + ) + .setTaskConfig( + PlanTaskConfiguration.newBuilder() + .setNumTasks(2) + .setVirtualCores(4) + .setMemoryMb(1024) + .setJavaOpts("") + .setTaskModule("x6.y6") + .build() + ) + .addInEdgeId("e5") + .addInEdgeId("e6") + .build() + ) + .addEdge( + EdgePlan.newBuilder() + .setEdgeDestination(TezEntityDescriptorProto.newBuilder().setClassName("i3_v1")) + .setInputVertexName("vertex1") + .setEdgeSource(TezEntityDescriptorProto.newBuilder().setClassName("o1")) + .setOutputVertexName("vertex3") + .setDataMovementType(PlanEdgeDataMovementType.SCATTER_GATHER) + .setId("e1") + .setDataSourceType(PlanEdgeDataSourceType.PERSISTED) + .setSchedulingType(PlanEdgeSchedulingType.SEQUENTIAL) + .build() + ) + .addEdge( + EdgePlan.newBuilder() + .setEdgeDestination(TezEntityDescriptorProto.newBuilder().setClassName("i3_v2")) + .setInputVertexName("vertex2") + .setEdgeSource(TezEntityDescriptorProto.newBuilder().setClassName("o2")) + .setOutputVertexName("vertex3") + .setDataMovementType(PlanEdgeDataMovementType.SCATTER_GATHER) + .setId("e2") + .setDataSourceType(PlanEdgeDataSourceType.PERSISTED) + .setSchedulingType(PlanEdgeSchedulingType.SEQUENTIAL) + .build() + ) + .addEdge( + EdgePlan.newBuilder() + .setEdgeDestination(TezEntityDescriptorProto.newBuilder().setClassName("i4_v3")) + .setInputVertexName("vertex3") + .setEdgeSource(TezEntityDescriptorProto.newBuilder().setClassName("o3_v4")) + .setOutputVertexName("vertex4") + .setDataMovementType(PlanEdgeDataMovementType.SCATTER_GATHER) + .setId("e3") + .setDataSourceType(PlanEdgeDataSourceType.PERSISTED) + .setSchedulingType(PlanEdgeSchedulingType.SEQUENTIAL) + .build() + ) + .addEdge( + EdgePlan.newBuilder() + .setEdgeDestination(TezEntityDescriptorProto.newBuilder().setClassName("i5_v3")) + .setInputVertexName("vertex3") + .setEdgeSource(TezEntityDescriptorProto.newBuilder().setClassName("o3_v5")) + .setOutputVertexName("vertex5") + .setDataMovementType(PlanEdgeDataMovementType.SCATTER_GATHER) + .setId("e4") + .setDataSourceType(PlanEdgeDataSourceType.PERSISTED) + .setSchedulingType(PlanEdgeSchedulingType.SEQUENTIAL) + .build() + ) + .addEdge( + EdgePlan.newBuilder() + .setEdgeDestination(TezEntityDescriptorProto.newBuilder().setClassName("i6_v4")) + .setInputVertexName("vertex4") + .setEdgeSource(TezEntityDescriptorProto.newBuilder().setClassName("o4")) + .setOutputVertexName("vertex6") + .setDataMovementType(PlanEdgeDataMovementType.SCATTER_GATHER) + .setId("e5") + .setDataSourceType(PlanEdgeDataSourceType.PERSISTED) + .setSchedulingType(PlanEdgeSchedulingType.SEQUENTIAL) + .build() + ) + .addEdge( + EdgePlan.newBuilder() + .setEdgeDestination(TezEntityDescriptorProto.newBuilder().setClassName("i6_v5")) + .setInputVertexName("vertex5") + .setEdgeSource(TezEntityDescriptorProto.newBuilder().setClassName("o5")) + .setOutputVertexName("vertex6") + .setDataMovementType(PlanEdgeDataMovementType.SCATTER_GATHER) + .setId("e6") + .setDataSourceType(PlanEdgeDataSourceType.PERSISTED) + .setSchedulingType(PlanEdgeSchedulingType.SEQUENTIAL) + .build() + ) + .build(); + + return dag; + } + + private void setupVertices(boolean cleanupShuffleDataAtVertexLevel) { int vCnt = dagPlan.getVertexCount(); LOG.info("Setting up vertices from dag plan, verticesCnt=" + vCnt); vertices = new HashMap(); vertexIdMap = new HashMap(); Configuration dagConf = new Configuration(false); + dagConf.setBoolean(TezConfiguration.TEZ_AM_DAG_CLEANUP_ON_COMPLETION, true); + conf.setInt(TezConfiguration.TEZ_AM_VERTEX_CLEANUP_HEIGHT, cleanupShuffleDataAtVertexLevel ? 1 : 0); dagConf.set("abc", "foobar"); for (int i = 0; i < vCnt; ++i) { VertexPlan vPlan = dagPlan.getVertex(i); @@ -2447,7 +2705,6 @@ private void parseVertexEdges() { Map outVertices = new HashMap(); - for(String inEdgeId : vertexPlan.getInEdgeIdList()){ EdgePlan edgePlan = edgePlans.get(inEdgeId); Vertex inVertex = this.vertices.get(edgePlan.getInputVertexName()); @@ -2472,8 +2729,14 @@ private void parseVertexEdges() { + ", outputVerticesCnt=" + outVertices.size()); vertex.setOutputVertices(outVertices); } + + for (Map.Entry vertex : vertices.entrySet()) { + VertexImpl vertexImpl = vertex.getValue(); + vertexImpl.initShuffleDeletionContext(2); + } } + public void setupPreDagCreation() { LOG.info("____________ RESETTING CURRENT DAG ____________"); conf = new Configuration(); @@ -2483,13 +2746,14 @@ public void setupPreDagCreation() { dagId = TezDAGID.getInstance(appAttemptId.getApplicationId(), 1); taskSpecificLaunchCmdOption = mock(TaskSpecificLaunchCmdOption.class); doReturn(false).when(taskSpecificLaunchCmdOption).addTaskSpecificLaunchCmdOption( - any(String.class), + any(), anyInt()); } @SuppressWarnings({ "unchecked", "rawtypes" }) - public void setupPostDagCreation() throws TezException { + public void setupPostDagCreation(boolean cleanupShuffleDataAtVertexLevel) throws TezException { String dagName = "dag0"; + taskCommunicatorManagerInterface = mock(TaskCommunicatorManagerInterface.class); // dispatcher may be created multiple times (setupPostDagCreation may be called multiples) if (dispatcher != null) { dispatcher.stop(); @@ -2499,6 +2763,40 @@ public void setupPostDagCreation() throws TezException { when(appContext.getHadoopShim()).thenReturn(new DefaultHadoopShim()); when(appContext.getContainerLauncherName(anyInt())).thenReturn( TezConstants.getTezYarnServicePluginName()); + DAGAppMaster mockDagAppMaster = mock(DAGAppMaster.class); + when(appContext.getAppMaster()).thenReturn(mockDagAppMaster); + doCallRealMethod().when(mockDagAppMaster).vertexComplete(any(TezVertexID.class), any(Set.class)); + List containerDescriptors = new ArrayList<>(); + ContainerLauncherDescriptor containerLaunchers = + ContainerLauncherDescriptor.create("ContainerLaunchers", + TezContainerLauncherImpl.class.getName()); + conf.setBoolean(TezConfiguration.TEZ_AM_DAG_CLEANUP_ON_COMPLETION, true); + conf.set(TezConfiguration.TEZ_AM_SHUFFLE_AUXILIARY_SERVICE_ID, "tez_shuffle"); + conf.setInt(TezConfiguration.TEZ_AM_VERTEX_CLEANUP_HEIGHT, 0); + try { + containerLaunchers.setUserPayload(UserPayload.create( + TezUtils.createByteStringFromConf(conf).asReadOnlyByteBuffer())); + } catch (IOException e) { + e.printStackTrace(); + } + containerDescriptors.add(containerLaunchers); + ContainerLauncherManager mockContainerLauncherManager = spy(new ContainerLauncherManager(appContext, + taskCommunicatorManagerInterface, "test", containerDescriptors, false)); + doCallRealMethod().when(mockContainerLauncherManager).vertexComplete(any( + TezVertexID.class), any(JobTokenSecretManager.class + ), any(Set.class)); + when(appContext.getAppMaster().getContainerLauncherManager()).thenReturn( + mockContainerLauncherManager); + mockContainerLauncherManager.init(conf); + mockContainerLauncherManager.start(); + AMContainerMap amContainerMap = mock(AMContainerMap.class); + AMContainer amContainer = mock(AMContainer.class); + Container mockContainer = mock(Container.class); + when(amContainer.getContainer()).thenReturn(mockContainer); + when(mockContainer.getNodeId()).thenReturn(mock(NodeId.class)); + when(mockContainer.getNodeHttpAddress()).thenReturn("localhost:12345"); + when(amContainerMap.get(any(ContainerId.class))).thenReturn(amContainer); + when(appContext.getAllContainers()).thenReturn(amContainerMap); thh = mock(TaskHeartbeatHandler.class); historyEventHandler = mock(HistoryEventHandler.class); @@ -2525,7 +2823,7 @@ public void setupPostDagCreation() throws TezException { execService = mock(ListeningExecutorService.class); final ListenableFuture mockFuture = mock(ListenableFuture.class); - Mockito.doAnswer(new Answer() { + doAnswer(new Answer() { public ListenableFuture answer(InvocationOnMock invocation) { Object[] args = invocation.getArguments(); CallableEvent e = (CallableEvent) args[0]; @@ -2557,7 +2855,7 @@ public ListenableFuture answer(InvocationOnMock invocation) { updateTracker.stop(); } updateTracker = new StateChangeNotifierForTest(appContext.getCurrentDAG()); - setupVertices(); + setupVertices(cleanupShuffleDataAtVertexLevel); when(dag.getVertex(any(TezVertexID.class))).thenAnswer(new Answer() { @Override public Vertex answer(InvocationOnMock invocation) throws Throwable { @@ -2622,7 +2920,7 @@ public void setup() throws TezException { setupPreDagCreation(); dagPlan = createTestDAGPlan(); invalidDagPlan = createInvalidDAGPlan(); - setupPostDagCreation(); + setupPostDagCreation(false); } @After @@ -2750,7 +3048,7 @@ public void testVertexInit() throws AMUserCodeException { public void testNonExistVertexManager() throws TezException { setupPreDagCreation(); dagPlan = createDAGPlanWithNonExistVertexManager(); - setupPostDagCreation(); + setupPostDagCreation(false); VertexImpl v1 = vertices.get("vertex1"); v1.handle(new VertexEvent(v1.getVertexId(), VertexEventType.V_INIT)); Assert.assertEquals(VertexState.FAILED, v1.getState()); @@ -2763,7 +3061,7 @@ public void testNonExistVertexManager() throws TezException { public void testNonExistInputInitializer() throws TezException { setupPreDagCreation(); dagPlan = createDAGPlanWithNonExistInputInitializer(); - setupPostDagCreation(); + setupPostDagCreation(false); VertexImpl v1 = vertices.get("vertex1"); v1.handle(new VertexEvent(v1.getVertexId(), VertexEventType.V_INIT)); Assert.assertEquals(VertexState.FAILED, v1.getState()); @@ -2776,7 +3074,7 @@ public void testNonExistInputInitializer() throws TezException { public void testNonExistOutputCommitter() throws TezException { setupPreDagCreation(); dagPlan = createDAGPlanWithNonExistOutputCommitter(); - setupPostDagCreation(); + setupPostDagCreation(false); VertexImpl v1 = vertices.get("vertex1"); v1.handle(new VertexEvent(v1.getVertexId(), VertexEventType.V_INIT)); Assert.assertEquals(VertexState.FAILED, v1.getState()); @@ -2815,7 +3113,7 @@ public void testVertexConfigureEventWithReconfigure() throws Exception { setupPreDagCreation(); // initialize() will make VM call planned() and started() will make VM call done() dagPlan = createDAGPlanWithVMException("TestVMStateUpdate", VMExceptionLocation.NoExceptionDoReconfigure); - setupPostDagCreation(); + setupPostDagCreation(false); TestUpdateListener listener = new TestUpdateListener(); updateTracker @@ -2842,7 +3140,7 @@ public void testVertexConfigureEventWithReconfigure() throws Exception { Assert.assertEquals("vertex2", listener.events.get(0).getVertexName()); Assert.assertEquals(org.apache.tez.dag.api.event.VertexState.CONFIGURED, listener.events.get(0).getVertexState()); - updateTracker.unregisterForVertexUpdates("vertex2", listener); + updateTracker.unregisterForVertexUpdates("vertex2", listener); } @Test (timeout=5000) @@ -3155,7 +3453,7 @@ private void checkTasks(Vertex v, int numTasks) { int i = 0; // iteration maintains order due to linked hash map for(Task task : tasks.values()) { - Assert.assertEquals(i, task.getTaskId().getId()); + Assert.assertEquals(i, task.getTaskID().getId()); i++; } } @@ -3638,7 +3936,7 @@ public void testVertexTaskAttemptProcessorFailure() throws Exception { startVertex(v); dispatcher.await(); TaskAttemptImpl ta = (TaskAttemptImpl) v.getTask(0).getAttempts().values().iterator().next(); - ta.handle(new TaskAttemptEventSchedule(ta.getID(), 2, 2)); + ta.handle(new TaskAttemptEventSchedule(ta.getTaskAttemptID(), 2, 2)); NodeId nid = NodeId.newInstance("127.0.0.1", 0); ContainerId contId = ContainerId.newInstance(appAttemptId, 3); @@ -3652,10 +3950,10 @@ public void testVertexTaskAttemptProcessorFailure() throws Exception { containers.addContainerIfNew(container, 0, 0, 0); doReturn(containers).when(appContext).getAllContainers(); - ta.handle(new TaskAttemptEventSubmitted(ta.getID(), contId)); - ta.handle(new TaskAttemptEventStartedRemotely(ta.getID())); + ta.handle(new TaskAttemptEventSubmitted(ta.getTaskAttemptID(), contId)); + ta.handle(new TaskAttemptEventStartedRemotely(ta.getTaskAttemptID())); Assert.assertEquals(TaskAttemptStateInternal.RUNNING, ta.getInternalState()); - ta.handle(new TaskAttemptEventAttemptFailed(ta.getID(), TaskAttemptEventType.TA_FAILED, + ta.handle(new TaskAttemptEventAttemptFailed(ta.getTaskAttemptID(), TaskAttemptEventType.TA_FAILED, TaskFailureType.NON_FATAL, "diag", TaskAttemptTerminationCause.APPLICATION_ERROR)); dispatcher.await(); @@ -3673,7 +3971,7 @@ public void testVertexTaskAttemptInputFailure() throws Exception { startVertex(v); dispatcher.await(); TaskAttemptImpl ta = (TaskAttemptImpl) v.getTask(0).getAttempts().values().iterator().next(); - ta.handle(new TaskAttemptEventSchedule(ta.getID(), 2, 2)); + ta.handle(new TaskAttemptEventSchedule(ta.getTaskAttemptID(), 2, 2)); NodeId nid = NodeId.newInstance("127.0.0.1", 0); ContainerId contId = ContainerId.newInstance(appAttemptId, 3); @@ -3687,11 +3985,11 @@ public void testVertexTaskAttemptInputFailure() throws Exception { containers.addContainerIfNew(container, 0, 0, 0); doReturn(containers).when(appContext).getAllContainers(); - ta.handle(new TaskAttemptEventSubmitted(ta.getID(), contId)); - ta.handle(new TaskAttemptEventStartedRemotely(ta.getID())); + ta.handle(new TaskAttemptEventSubmitted(ta.getTaskAttemptID(), contId)); + ta.handle(new TaskAttemptEventStartedRemotely(ta.getTaskAttemptID())); Assert.assertEquals(TaskAttemptStateInternal.RUNNING, ta.getInternalState()); - ta.handle(new TaskAttemptEventAttemptFailed(ta.getID(), TaskAttemptEventType.TA_FAILED, + ta.handle(new TaskAttemptEventAttemptFailed(ta.getTaskAttemptID(), TaskAttemptEventType.TA_FAILED, TaskFailureType.NON_FATAL, "diag", TaskAttemptTerminationCause.INPUT_READ_ERROR)); dispatcher.await(); @@ -3710,7 +4008,7 @@ public void testVertexTaskAttemptOutputFailure() throws Exception { startVertex(v); dispatcher.await(); TaskAttemptImpl ta = (TaskAttemptImpl) v.getTask(0).getAttempts().values().iterator().next(); - ta.handle(new TaskAttemptEventSchedule(ta.getID(), 2, 2)); + ta.handle(new TaskAttemptEventSchedule(ta.getTaskAttemptID(), 2, 2)); NodeId nid = NodeId.newInstance("127.0.0.1", 0); ContainerId contId = ContainerId.newInstance(appAttemptId, 3); @@ -3724,11 +4022,11 @@ public void testVertexTaskAttemptOutputFailure() throws Exception { containers.addContainerIfNew(container, 0, 0, 0); doReturn(containers).when(appContext).getAllContainers(); - ta.handle(new TaskAttemptEventSubmitted(ta.getID(), contId)); - ta.handle(new TaskAttemptEventStartedRemotely(ta.getID())); + ta.handle(new TaskAttemptEventSubmitted(ta.getTaskAttemptID(), contId)); + ta.handle(new TaskAttemptEventStartedRemotely(ta.getTaskAttemptID())); Assert.assertEquals(TaskAttemptStateInternal.RUNNING, ta.getInternalState()); - ta.handle(new TaskAttemptEventAttemptFailed(ta.getID(), TaskAttemptEventType.TA_FAILED, + ta.handle(new TaskAttemptEventAttemptFailed(ta.getTaskAttemptID(), TaskAttemptEventType.TA_FAILED, TaskFailureType.NON_FATAL, "diag", TaskAttemptTerminationCause.OUTPUT_WRITE_ERROR)); dispatcher.await(); @@ -3824,7 +4122,7 @@ public void testFailuresMaxPercentSourceTaskAttemptCompletionEvents() throws Tez conf.setFloat(TezConfiguration.TEZ_VERTEX_FAILURES_MAXPERCENT, 50.0f); conf.setInt(TezConfiguration.TEZ_AM_TASK_MAX_FAILED_ATTEMPTS, 1); dagPlan = createTestDAGPlan(); - setupPostDagCreation(); + setupPostDagCreation(false); initAllVertices(VertexState.INITED); VertexImpl v4 = vertices.get("vertex4"); @@ -3879,7 +4177,7 @@ public void testFailuresMaxPercentExceededSourceTaskAttemptCompletionEvents() th conf.setFloat(TezConfiguration.TEZ_VERTEX_FAILURES_MAXPERCENT, 50.0f); conf.setInt(TezConfiguration.TEZ_AM_TASK_MAX_FAILED_ATTEMPTS, 1); dagPlan = createTestDAGPlan(); - setupPostDagCreation(); + setupPostDagCreation(false); initAllVertices(VertexState.INITED); VertexImpl v4 = vertices.get("vertex4"); @@ -3978,7 +4276,7 @@ public void testTaskReschedule() { public void testTerminatingVertexForTaskComplete() throws Exception { setupPreDagCreation(); dagPlan = createSamplerDAGPlan(false); - setupPostDagCreation(); + setupPostDagCreation(false); VertexImpl vertex = spy(vertices.get("A")); initVertex(vertex); startVertex(vertex); @@ -3996,7 +4294,7 @@ public void testTerminatingVertexForTaskComplete() throws Exception { public void testTerminatingVertexForVComplete() throws Exception { setupPreDagCreation(); dagPlan = createSamplerDAGPlan(false); - setupPostDagCreation(); + setupPostDagCreation(false); VertexImpl vertex = spy(vertices.get("A")); initVertex(vertex); startVertex(vertex); @@ -4251,7 +4549,7 @@ public void testBadCommitter2() throws Exception { public void testVertexInitWithCustomVertexManager() throws Exception { setupPreDagCreation(); dagPlan = createDAGWithCustomVertexManager(); - setupPostDagCreation(); + setupPostDagCreation(false); int numTasks = 3; VertexImpl v1 = vertices.get("v1"); @@ -4304,8 +4602,8 @@ public void testVertexInitWithCustomVertexManager() throws Exception { @Test(timeout = 5000) public void testVertexManagerHeuristic() throws TezException { setupPreDagCreation(); - dagPlan = createDAGPlanWithMixedEdges(); - setupPostDagCreation(); + dagPlan = createDAGPlanWithMixedEdges("testVertexManagerHeuristic"); + setupPostDagCreation(false); initAllVertices(VertexState.INITED); Assert.assertEquals(ImmediateStartVertexManager.class, vertices.get("vertex1").getVertexManager().getPlugin().getClass()); @@ -4330,7 +4628,7 @@ public void testVertexWithOneToOneSplit() throws Exception { useCustomInitializer = true; setupPreDagCreation(); dagPlan = createDAGPlanForOneToOneSplit("TestInputInitializer", -1, true); - setupPostDagCreation(); + setupPostDagCreation(false); int numTasks = 5; VertexImplWithControlledInitializerManager v1 = (VertexImplWithControlledInitializerManager) vertices @@ -4397,7 +4695,7 @@ public void testVertexWithOneToOneSplitWhileRunning() throws Exception { // create a diamond shaped dag with 1-1 edges. setupPreDagCreation(); dagPlan = createDAGPlanForOneToOneSplit(null, numTasks, false); - setupPostDagCreation(); + setupPostDagCreation(false); VertexImpl v1 = vertices.get("vertex1"); v1.vertexReconfigurationPlanned(); initAllVertices(VertexState.INITED); @@ -4436,7 +4734,7 @@ public void testVertexWithOneToOneSplitWhileInited() throws Exception { // create a diamond shaped dag with 1-1 edges. setupPreDagCreation(); dagPlan = createDAGPlanForOneToOneSplit(null, numTasks, false); - setupPostDagCreation(); + setupPostDagCreation(false); VertexImpl v1 = vertices.get("vertex1"); v1.vertexReconfigurationPlanned(); initAllVertices(VertexState.INITED); @@ -4478,7 +4776,7 @@ public void testVertexVMErrorReport() throws Exception { // create a diamond shaped dag with 1-1 edges. setupPreDagCreation(); dagPlan = createDAGPlanForOneToOneSplit(null, numTasks, false); - setupPostDagCreation(); + setupPostDagCreation(false); VertexImpl v1 = vertices.get("vertex1"); initAllVertices(VertexState.INITED); @@ -4522,7 +4820,7 @@ public void testVertexWithInitializerFailure() throws Exception { useCustomInitializer = true; setupPreDagCreation(); dagPlan = createDAGPlanWithInputInitializer("TestInputInitializer"); - setupPostDagCreation(); + setupPostDagCreation(false); VertexImplWithControlledInitializerManager v1 = (VertexImplWithControlledInitializerManager) vertices .get("vertex1"); @@ -4567,7 +4865,7 @@ public void testVertexWithInitializerParallelismSetTo0() throws InterruptedExcep setupPreDagCreation(); dagPlan = createDAGPlanWithInitializer0Tasks(RootInitializerSettingParallelismTo0.class.getName()); - setupPostDagCreation(); + setupPostDagCreation(false); VertexImpl v1 = vertices.get("vertex1"); VertexImpl v2 = vertices.get("vertex2"); @@ -4615,7 +4913,7 @@ public void testInputInitializerVertexStateUpdates() throws Exception { initializer.setNumVertexStateUpdateEvents(3); setupPreDagCreation(); dagPlan = createDAGPlanWithRunningInitializer(); - setupPostDagCreation(); + setupPostDagCreation(false); VertexImplWithRunningInputInitializer v1 = (VertexImplWithRunningInputInitializer) vertices.get("vertex1"); @@ -4650,7 +4948,7 @@ public void testInputInitializerEventMultipleAttempts() throws Exception { (EventHandlingRootInputInitializer) customInitializer; setupPreDagCreation(); dagPlan = createDAGPlanWithRunningInitializer4(); - setupPostDagCreation(); + setupPostDagCreation(false); VertexImplWithRunningInputInitializer v1 = (VertexImplWithRunningInputInitializer) vertices.get("vertex1"); @@ -4738,7 +5036,7 @@ public void testInputInitializerEventsMultipleSources() throws Exception { initializer.setNumExpectedEvents(4); setupPreDagCreation(); dagPlan = createDAGPlanWithRunningInitializer4(); - setupPostDagCreation(); + setupPostDagCreation(false); VertexImplWithRunningInputInitializer v1 = (VertexImplWithRunningInputInitializer) vertices.get("vertex1"); @@ -4800,7 +5098,7 @@ public void testInputInitializerEventsMultipleSources() throws Exception { Assert.assertEquals(2, v2.getTotalTasks()); // Generate events from v2 to v3's initializer. 1 from task 0, 2 from task 1 for (Task task : v2.getTasks().values()) { - TezTaskID taskId = task.getTaskId(); + TezTaskID taskId = task.getTaskID(); TezTaskAttemptID attemptId = TezTaskAttemptID.getInstance(taskId, 0); int numEventsFromTask = taskId.getId() + 1; for (int i = 0; i < numEventsFromTask; i++) { @@ -4861,7 +5159,7 @@ public void testInputInitializerEventNoDirectConnection() throws Exception { (EventHandlingRootInputInitializer) customInitializer; setupPreDagCreation(); dagPlan = createDAGPlanWithRunningInitializer4(); - setupPostDagCreation(); + setupPostDagCreation(false); VertexImplWithRunningInputInitializer v1 = (VertexImplWithRunningInputInitializer) vertices.get("vertex1"); @@ -4941,7 +5239,7 @@ public void testInputInitializerEventsAtNew() throws Exception { (EventHandlingRootInputInitializer) customInitializer; setupPreDagCreation(); dagPlan = createDAGPlanWithRunningInitializer3(); - setupPostDagCreation(); + setupPostDagCreation(false); VertexImplWithRunningInputInitializer v1 = (VertexImplWithRunningInputInitializer) vertices.get("vertex1"); @@ -5027,7 +5325,7 @@ public void testInputInitializerEvents() throws Exception { (EventHandlingRootInputInitializer) customInitializer; setupPreDagCreation(); dagPlan = createDAGPlanWithRunningInitializer(); - setupPostDagCreation(); + setupPostDagCreation(false); VertexImplWithRunningInputInitializer v1 = (VertexImplWithRunningInputInitializer) vertices.get("vertex1"); @@ -5104,7 +5402,7 @@ public void testInputInitializerEvents() throws Exception { public void testTaskSchedulingWithCustomEdges() throws TezException { setupPreDagCreation(); dagPlan = createCustomDAGWithCustomEdges(); - setupPostDagCreation(); + setupPostDagCreation(false); /** * @@ -5402,7 +5700,7 @@ public void testVertexWithMultipleInitializers1() throws Exception { useCustomInitializer = true; setupPreDagCreation(); dagPlan = createDAGPlanWithMultipleInitializers("TestInputInitializer"); - setupPostDagCreation(); + setupPostDagCreation(false); VertexImplWithControlledInitializerManager v1 = (VertexImplWithControlledInitializerManager) vertices .get("vertex1"); @@ -5432,7 +5730,7 @@ public void testVertexWithMultipleInitializers2() throws Exception { useCustomInitializer = true; setupPreDagCreation(); dagPlan = createDAGPlanWithMultipleInitializers("TestInputInitializer"); - setupPostDagCreation(); + setupPostDagCreation(false); VertexImplWithControlledInitializerManager v1 = (VertexImplWithControlledInitializerManager) vertices .get("vertex1"); @@ -5462,7 +5760,7 @@ public void testVertexWithInitializerSuccess() throws Exception { useCustomInitializer = true; setupPreDagCreation(); dagPlan = createDAGPlanWithInputInitializer("TestInputInitializer"); - setupPostDagCreation(); + setupPostDagCreation(false); VertexImplWithControlledInitializerManager v1 = (VertexImplWithControlledInitializerManager) vertices .get("vertex1"); @@ -5499,7 +5797,7 @@ public void testVertexWithInitializerSuccess() throws Exception { for (int i=0; i> inputs) { - this.inputs = inputs; + List> inputList) { + this.inputs = inputList; } @Override @@ -5983,7 +6313,7 @@ public void completeInputInitialization(int initializerIndex, int targetTasks, public void testVertexGroupInput() throws TezException { setupPreDagCreation(); dagPlan = createVertexGroupDAGPlan(); - setupPostDagCreation(); + setupPostDagCreation(false); VertexImpl vA = vertices.get("A"); VertexImpl vB = vertices.get("B"); @@ -6012,7 +6342,7 @@ public void testStartWithUninitializedCustomEdge() throws Exception { // been initialized setupPreDagCreation(); dagPlan = createSamplerDAGPlan(true); - setupPostDagCreation(); + setupPostDagCreation(false); VertexImpl vA = vertices.get("A"); VertexImpl vB = vertices.get("B"); @@ -6061,7 +6391,7 @@ public void testVertexConfiguredDoneByVMBeforeEdgeDefined() throws Exception { // been initialized setupPreDagCreation(); dagPlan = createSamplerDAGPlan(true); - setupPostDagCreation(); + setupPostDagCreation(false); VertexImpl vA = vertices.get("A"); VertexImpl vB = vertices.get("B"); @@ -6135,7 +6465,7 @@ public void testInitStartRace() throws TezException { // been initialized setupPreDagCreation(); dagPlan = createSamplerDAGPlan(false); - setupPostDagCreation(); + setupPostDagCreation(false); VertexImpl vA = vertices.get("A"); VertexImpl vB = vertices.get("B"); @@ -6158,7 +6488,7 @@ public void testInitStartRace2() throws TezException { // been initialized setupPreDagCreation(); dagPlan = createSamplerDAGPlan2(); - setupPostDagCreation(); + setupPostDagCreation(false); VertexImpl vA = vertices.get("A"); VertexImpl vB = vertices.get("B"); @@ -6183,7 +6513,7 @@ public void testInitStartRace2() throws TezException { public void testTez2684() throws IOException, TezException { setupPreDagCreation(); dagPlan = createSamplerDAGPlan2(); - setupPostDagCreation(); + setupPostDagCreation(false); VertexImpl vA = vertices.get("A"); VertexImpl vB = vertices.get("B"); @@ -6223,7 +6553,7 @@ public void testTez2684() throws IOException, TezException { public void testVertexGraceParallelism() throws IOException, TezException { setupPreDagCreation(); dagPlan = createDAGPlanForGraceParallelism(); - setupPostDagCreation(); + setupPostDagCreation(false); VertexImpl vA = vertices.get("A"); VertexImpl vB = vertices.get("B"); @@ -6291,7 +6621,7 @@ public void testVMEventBeforeVertexInitialized() throws Exception { useCustomInitializer = true; setupPreDagCreation(); dagPlan = createDAGPlanWithCountingVM(); - setupPostDagCreation(); + setupPostDagCreation(false); VertexImpl v1 = vertices.get("vertex1"); VertexImpl v2 = vertices.get("vertex2"); @@ -6348,7 +6678,7 @@ public void testExceptionFromVM_Initialize() throws TezException { useCustomInitializer = true; setupPreDagCreation(); dagPlan = createDAGPlanWithVMException("TestInputInitializer", VMExceptionLocation.Initialize); - setupPostDagCreation(); + setupPostDagCreation(false); VertexImplWithControlledInitializerManager v1 = (VertexImplWithControlledInitializerManager) vertices .get("vertex1"); @@ -6367,7 +6697,7 @@ public void testExceptionFromVM_OnRootVertexInitialized() throws Exception { useCustomInitializer = true; setupPreDagCreation(); dagPlan = createDAGPlanWithVMException("TestInputInitializer", VMExceptionLocation.OnRootVertexInitialized); - setupPostDagCreation(); + setupPostDagCreation(false); VertexImplWithControlledInitializerManager v1 = (VertexImplWithControlledInitializerManager) vertices .get("vertex1"); @@ -6391,7 +6721,7 @@ public void testExceptionFromVM_OnVertexStarted() throws Exception { useCustomInitializer = true; setupPreDagCreation(); dagPlan = createDAGPlanWithVMException("TestInputInitializer", VMExceptionLocation.OnVertexStarted); - setupPostDagCreation(); + setupPostDagCreation(false); VertexImplWithControlledInitializerManager v1 = (VertexImplWithControlledInitializerManager) vertices .get("vertex1"); @@ -6418,7 +6748,7 @@ public void testExceptionFromVM_OnSourceTaskCompleted() throws Exception { useCustomInitializer = true; setupPreDagCreation(); dagPlan = createDAGPlanWithVMException("TestInputInitializer", VMExceptionLocation.OnSourceTaskCompleted); - setupPostDagCreation(); + setupPostDagCreation(false); VertexImplWithControlledInitializerManager v1 = (VertexImplWithControlledInitializerManager) vertices .get("vertex1"); @@ -6454,7 +6784,7 @@ public void testExceptionFromVM_OnVertexManagerEventReceived() throws Exception useCustomInitializer = true; setupPreDagCreation(); dagPlan = createDAGPlanWithVMException("TestInputInitializer", VMExceptionLocation.OnVertexManagerEventReceived); - setupPostDagCreation(); + setupPostDagCreation(false); VertexImplWithControlledInitializerManager v1 = (VertexImplWithControlledInitializerManager) vertices .get("vertex1"); @@ -6482,7 +6812,7 @@ public void testExceptionFromVM_OnVertexManagerVertexStateUpdated() throws Excep useCustomInitializer = true; setupPreDagCreation(); dagPlan = createDAGPlanWithVMException("TestVMStateUpdate", VMExceptionLocation.OnVertexManagerVertexStateUpdated); - setupPostDagCreation(); + setupPostDagCreation(false); VertexImplWithControlledInitializerManager v1 = (VertexImplWithControlledInitializerManager) vertices .get("vertex1"); @@ -6511,7 +6841,7 @@ public void testExceptionFromII_Initialize() throws InterruptedException, TezExc (EventHandlingRootInputInitializer) customInitializer; setupPreDagCreation(); dagPlan = createDAGPlanWithIIException(); - setupPostDagCreation(); + setupPostDagCreation(false); VertexImplWithRunningInputInitializer v1 = (VertexImplWithRunningInputInitializer) vertices.get("vertex1"); @@ -6532,7 +6862,7 @@ public void testExceptionFromII_InitFailedAfterInitialized() throws Exception { useCustomInitializer = true; setupPreDagCreation(); dagPlan = createDAGPlanWithIIException(); - setupPostDagCreation(); + setupPostDagCreation(false); VertexImplWithControlledInitializerManager v1 = (VertexImplWithControlledInitializerManager)vertices.get("vertex1"); @@ -6556,7 +6886,7 @@ public void testExceptionFromII_InitFailedAfterRunning() throws Exception { useCustomInitializer = true; setupPreDagCreation(); dagPlan = createDAGPlanWithIIException(); - setupPostDagCreation(); + setupPostDagCreation(false); VertexImplWithControlledInitializerManager v1 = (VertexImplWithControlledInitializerManager)vertices.get("vertex1"); @@ -6584,7 +6914,7 @@ public void testExceptionFromII_HandleInputInitializerEvent() throws Exception { (EventHandlingRootInputInitializer) customInitializer; setupPreDagCreation(); dagPlan = createDAGPlanWithRunningInitializer(); - setupPostDagCreation(); + setupPostDagCreation(false); VertexImplWithRunningInputInitializer v1 = (VertexImplWithRunningInputInitializer) vertices.get("vertex1"); @@ -6634,7 +6964,7 @@ public void testExceptionFromII_OnVertexStateUpdated() throws InterruptedExcepti (EventHandlingRootInputInitializer) customInitializer; setupPreDagCreation(); dagPlan = createDAGPlanWithRunningInitializer(); - setupPostDagCreation(); + setupPostDagCreation(false); VertexImplWithRunningInputInitializer v1 = (VertexImplWithRunningInputInitializer) vertices.get("vertex1"); @@ -6663,7 +6993,7 @@ public void testExceptionFromII_InitSucceededAfterInitFailure() throws Interrupt (EventHandlingRootInputInitializer) customInitializer; setupPreDagCreation(); dagPlan = createDAGPlanWithRunningInitializer(); - setupPostDagCreation(); + setupPostDagCreation(false); VertexImplWithRunningInputInitializer v1 = (VertexImplWithRunningInputInitializer) vertices.get("vertex1"); @@ -7136,8 +7466,8 @@ public void testLastTaskFinishTime() { TaskImpl task0 = (TaskImpl) v.getTask(tid0); TaskImpl task1 = (TaskImpl) v.getTask(tid1); - TezTaskAttemptID taskAttemptId0 = TezTaskAttemptID.getInstance(task0.getTaskId(), 0); - TezTaskAttemptID taskAttemptId1 = TezTaskAttemptID.getInstance(task1.getTaskId(), 0); + TezTaskAttemptID taskAttemptId0 = TezTaskAttemptID.getInstance(task0.getTaskID(), 0); + TezTaskAttemptID taskAttemptId1 = TezTaskAttemptID.getInstance(task1.getTaskID(), 0); TaskAttemptImpl taskAttempt0 = (TaskAttemptImpl) task0.getAttempt(taskAttemptId0); TaskAttemptImpl taskAttempt1 = (TaskAttemptImpl) task1.getAttempt(taskAttemptId1); @@ -7169,7 +7499,7 @@ public void testPickupDagLocalResourceOnScheduleTask() { VertexImpl v1 = vertices.get("vertex1"); startVertex(v1); - TezTaskAttemptID taskAttemptId0 = TezTaskAttemptID.getInstance(v1.getTask(0).getTaskId(), 0); + TezTaskAttemptID taskAttemptId0 = TezTaskAttemptID.getInstance(v1.getTask(0).getTaskID(), 0); TaskAttemptImpl ta0 = (TaskAttemptImpl) v1.getTask(0).getAttempt(taskAttemptId0); ta0.handle(new TaskAttemptEventSchedule(taskAttemptId0, 1, 1)); @@ -7180,4 +7510,131 @@ public void testPickupDagLocalResourceOnScheduleTask() { Assert.assertTrue(localResourceMap.containsKey("dag lr")); Assert.assertTrue(localResourceMap.containsKey("vertex lr")); } + + @Test + public void testVertexShuffleDelete() throws Exception { + setupPreDagCreation(); + dagPlan = createDAGPlanVertexShuffleDelete(); + setupPostDagCreation(true); + checkSpannedVertices(); + runVertices(); + Mockito.verify(appContext.getAppMaster().getContainerLauncherManager(), + times(3)).vertexComplete(any(), any(), any()); + } + + private void checkSpannedVertices() { + // vertex1 should have 0 ancestor and 2 children at height = 2 + VertexImpl v1 = vertices.get("vertex1"); + checkResults(v1.vShuffleDeletionContext.getAncestors(), new ArrayList<>()); + checkResults(v1.vShuffleDeletionContext.getChildren(), Arrays.asList("vertex5", "vertex4")); + + // vertex2 should have 0 ancestor and 2 children at height = 2 + VertexImpl v2 = vertices.get("vertex2"); + checkResults(v2.vShuffleDeletionContext.getAncestors(), new ArrayList<>()); + checkResults(v2.vShuffleDeletionContext.getChildren(), Arrays.asList("vertex5", "vertex4")); + + // vertex3 should have 0 ancestor and 1 children at height = 2 + VertexImpl v3 = vertices.get("vertex3"); + checkResults(v3.vShuffleDeletionContext.getAncestors(), new ArrayList<>()); + checkResults(v3.vShuffleDeletionContext.getChildren(), Arrays.asList("vertex6")); + + // vertex4 should have 2 ancestor and 0 children at height = 2 + VertexImpl v4 = vertices.get("vertex4"); + checkResults(v4.vShuffleDeletionContext.getAncestors(), Arrays.asList("vertex1", "vertex2")); + checkResults(v4.vShuffleDeletionContext.getChildren(), new ArrayList<>()); + + // vertex5 should have 2 ancestor and 0 children at height = 2 + VertexImpl v5 = vertices.get("vertex5"); + checkResults(v5.vShuffleDeletionContext.getAncestors(), Arrays.asList("vertex1", "vertex2")); + checkResults(v5.vShuffleDeletionContext.getChildren(), new ArrayList<>()); + + // vertex6 should have 1 ancestor and 0 children at height = 2 + VertexImpl v6 = vertices.get("vertex6"); + checkResults(v6.vShuffleDeletionContext.getAncestors(), Arrays.asList("vertex3")); + checkResults(v6.vShuffleDeletionContext.getChildren(), new ArrayList<>()); + } + + private void checkResults(Set actual, List expected) { + assertEquals(actual.size(), expected.size()); + for (Vertex vertex : actual) { + assertTrue(expected.contains(vertex.getName())); + } + } + + private void runVertices() { + VertexImpl v1 = vertices.get("vertex1"); + VertexImpl v2 = vertices.get("vertex2"); + VertexImpl v3 = vertices.get("vertex3"); + VertexImpl v4 = vertices.get("vertex4"); + VertexImpl v5 = vertices.get("vertex5"); + VertexImpl v6 = vertices.get("vertex6"); + dispatcher.getEventHandler().handle(new VertexEvent(v1.getVertexId(), VertexEventType.V_INIT)); + dispatcher.getEventHandler().handle(new VertexEvent(v2.getVertexId(), VertexEventType.V_INIT)); + dispatcher.await(); + dispatcher.getEventHandler().handle(new VertexEvent(v1.getVertexId(), VertexEventType.V_START)); + dispatcher.getEventHandler().handle(new VertexEvent(v2.getVertexId(), VertexEventType.V_START)); + dispatcher.await(); + + TezTaskID v1t1 = TezTaskID.getInstance(v1.getVertexId(), 0); + Map attempts = v1.getTask(v1t1).getAttempts(); + startAttempts(attempts); + v1.handle(new VertexEventTaskCompleted(v1t1, TaskState.SUCCEEDED)); + TezTaskID v2t1 = TezTaskID.getInstance(v2.getVertexId(), 0); + attempts = v2.getTask(v2t1).getAttempts(); + startAttempts(attempts); + v2.handle(new VertexEventTaskCompleted(v2t1, TaskState.SUCCEEDED)); + TezTaskID v2t2 = TezTaskID.getInstance(v2.getVertexId(), 1); + attempts = v2.getTask(v2t2).getAttempts(); + startAttempts(attempts); + v2.handle(new VertexEventTaskCompleted(v2t2, TaskState.SUCCEEDED)); + TezTaskID v3t1 = TezTaskID.getInstance(v3.getVertexId(), 0); + v3.scheduleTasks(Lists.newArrayList(ScheduleTaskRequest.create(0, null))); + dispatcher.await(); + attempts = v3.getTask(v3t1).getAttempts(); + startAttempts(attempts); + v3.handle(new VertexEventTaskCompleted(v3t1, TaskState.SUCCEEDED)); + TezTaskID v3t2 = TezTaskID.getInstance(v3.getVertexId(), 1); + attempts = v3.getTask(v3t2).getAttempts(); + startAttempts(attempts); + v3.handle(new VertexEventTaskCompleted(v3t2, TaskState.SUCCEEDED)); + dispatcher.await(); + TezTaskID v4t1 = TezTaskID.getInstance(v4.getVertexId(), 0); + attempts = v4.getTask(v4t1).getAttempts(); + startAttempts(attempts); + v4.handle(new VertexEventTaskCompleted(v4t1, TaskState.SUCCEEDED)); + TezTaskID v4t2 = TezTaskID.getInstance(v4.getVertexId(), 1); + attempts = v4.getTask(v4t2).getAttempts(); + startAttempts(attempts); + v4.handle(new VertexEventTaskCompleted(v4t2, TaskState.SUCCEEDED)); + TezTaskID v5t1 = TezTaskID.getInstance(v5.getVertexId(), 0); + attempts = v5.getTask(v5t1).getAttempts(); + startAttempts(attempts); + v5.handle(new VertexEventTaskCompleted(v5t1, TaskState.SUCCEEDED)); + TezTaskID v5t2 = TezTaskID.getInstance(v5.getVertexId(), 1); + attempts = v5.getTask(v5t2).getAttempts(); + startAttempts(attempts); + v5.handle(new VertexEventTaskCompleted(v5t2, TaskState.SUCCEEDED)); + TezTaskID v6t1 = TezTaskID.getInstance(v6.getVertexId(), 0); + attempts = v6.getTask(v6t1).getAttempts(); + startAttempts(attempts); + v6.handle(new VertexEventTaskCompleted(v6t1, TaskState.SUCCEEDED)); + TezTaskID v6t2 = TezTaskID.getInstance(v6.getVertexId(), 1); + attempts = v6.getTask(v6t2).getAttempts(); + startAttempts(attempts); + v6.handle(new VertexEventTaskCompleted(v6t2, TaskState.SUCCEEDED)); + dispatcher.await(); + } + + private void startAttempts(Map attempts) { + for (Map.Entry entry : attempts.entrySet()) { + TezTaskAttemptID id = entry.getKey(); + TaskAttemptImpl taskAttempt = (TaskAttemptImpl)entry.getValue(); + taskAttempt.handle(new TaskAttemptEventSchedule(id, 10, 10)); + dispatcher.await(); + ContainerId mockContainer = mock(ContainerId.class, RETURNS_DEEP_STUBS); + taskAttempt.handle(new TaskAttemptEventSubmitted(id, mockContainer)); + taskAttempt.handle(new TaskAttemptEventStartedRemotely(id)); + dispatcher.await(); + } + } } diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestVertexManager.java b/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestVertexManager.java index 3d9f2714af..058dc76164 100644 --- a/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestVertexManager.java +++ b/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestVertexManager.java @@ -18,14 +18,18 @@ package org.apache.tez.dag.app.dag.impl; +import static org.junit.Assert.assertArrayEquals; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertNull; import static org.junit.Assert.assertTrue; -import static org.mockito.Matchers.any; +import static org.junit.Assert.fail; import static org.mockito.Mockito.RETURNS_DEEP_STUBS; +import static org.mockito.Mockito.any; +import static org.mockito.Mockito.doAnswer; import static org.mockito.Mockito.doReturn; import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.never; import static org.mockito.Mockito.times; import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; @@ -56,8 +60,10 @@ import org.apache.tez.dag.app.dag.Vertex; import org.apache.tez.dag.app.dag.event.CallableEvent; import org.apache.tez.dag.app.dag.event.VertexEventInputDataInformation; +import org.apache.tez.dag.app.dag.event.VertexEventRouteEvent; import org.apache.tez.runtime.api.Event; import org.apache.tez.runtime.api.TaskAttemptIdentifier; +import org.apache.tez.runtime.api.events.CustomProcessorEvent; import org.apache.tez.runtime.api.events.InputDataInformationEvent; import org.apache.tez.runtime.api.events.VertexManagerEvent; import org.apache.tez.runtime.api.impl.GroupInputSpec; @@ -65,7 +71,6 @@ import org.junit.Before; import org.junit.Test; import org.mockito.ArgumentCaptor; -import org.mockito.Mockito; import org.mockito.invocation.InvocationOnMock; import org.mockito.stubbing.Answer; @@ -87,7 +92,7 @@ public void setup() { mockAppContext = mock(AppContext.class, RETURNS_DEEP_STUBS); execService = mock(ListeningExecutorService.class); final ListenableFuture mockFuture = mock(ListenableFuture.class); - Mockito.doAnswer(new Answer() { + doAnswer(new Answer() { public ListenableFuture answer(InvocationOnMock invocation) { Object[] args = invocation.getArguments(); CallableEvent e = (CallableEvent) args[0]; @@ -214,10 +219,9 @@ public void testOnRootVertexInitialized2() throws Exception { @Test(timeout = 5000) public void testVMPluginCtxGetInputVertexGroup() throws Exception { VertexManager vm = - new VertexManager( - VertexManagerPluginDescriptor.create(CustomVertexManager.class - .getName()), UserGroupInformation.getCurrentUser(), - mockVertex, mockAppContext, mock(StateChangeNotifier.class)); + new VertexManager(VertexManagerPluginDescriptor.create(CustomVertexManager.class.getName()), + UserGroupInformation.getCurrentUser(), mockVertex, mockAppContext, + mock(StateChangeNotifier.class)); assertTrue(vm.pluginContext.getInputVertexGroups().isEmpty()); @@ -232,6 +236,59 @@ public void testVMPluginCtxGetInputVertexGroup() throws Exception { assertTrue(groups.get(group).contains(v2)); } + @Test(timeout = 5000) + public void testSendCustomProcessorEvent() throws Exception { + VertexManager vm = + new VertexManager(VertexManagerPluginDescriptor.create(CustomVertexManager.class.getName()), + UserGroupInformation.getCurrentUser(), mockVertex, mockAppContext, + mock(StateChangeNotifier.class)); + ArgumentCaptor requestCaptor = + ArgumentCaptor.forClass(VertexEventRouteEvent.class); + + when(mockVertex.getTotalTasks()).thenReturn(2); + + List events = new ArrayList<>(); + // task id too small, should fail + try { + vm.pluginContext.sendEventToProcessor(events, -1); + fail("Should fail for invalid task id"); + } catch (IllegalArgumentException exception) { + assertTrue(exception.getMessage().contains("Invalid taskId")); + } + // task id too large, should fail + try { + vm.pluginContext.sendEventToProcessor(events, 10); + fail("Should fail for invalid task id"); + } catch (IllegalArgumentException exception) { + assertTrue(exception.getMessage().contains("Invalid taskId")); + } + + // null event, do nothing + vm.pluginContext.sendEventToProcessor(null, 0); + verify(mockHandler, never()).handle(requestCaptor.capture()); + + // empty event + vm.pluginContext.sendEventToProcessor(events, 1); + verify(mockHandler, never()).handle(requestCaptor.capture()); + + //events.add(); + byte[] payload = new byte[] {1,2,3}; + events.add(CustomProcessorEvent.create(ByteBuffer.wrap(payload))); + vm.pluginContext.sendEventToProcessor(events, 1); + verify(mockHandler, times(1)).handle(requestCaptor.capture()); + CustomProcessorEvent cpe = + (CustomProcessorEvent)(requestCaptor.getValue().getEvents().get(0).getEvent()); + + // should be able to get payload any times + for (int i = 0; i < 2; i++) { + ByteBuffer payloadBuffer = cpe.getPayload(); + assertEquals(payload.length, payloadBuffer.remaining()); + for (byte aPayload : payload) { + assertEquals(aPayload, payloadBuffer.get()); + } + } + } + public static class CustomVertexManager extends VertexManagerPlugin { private Map> cachedEventMap = new HashMap>(); diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/launcher/TestContainerLauncherManager.java b/tez-dag/src/test/java/org/apache/tez/dag/app/launcher/TestContainerLauncherManager.java index b3568eb20f..f1f2478db3 100644 --- a/tez-dag/src/test/java/org/apache/tez/dag/app/launcher/TestContainerLauncherManager.java +++ b/tez-dag/src/test/java/org/apache/tez/dag/app/launcher/TestContainerLauncherManager.java @@ -19,7 +19,7 @@ import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; -import static org.mockito.Matchers.any; +import static org.mockito.Mockito.any; import static org.mockito.Mockito.doReturn; import static org.mockito.Mockito.doThrow; import static org.mockito.Mockito.mock; @@ -27,6 +27,7 @@ import static org.mockito.Mockito.spy; import static org.mockito.Mockito.times; import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; import java.io.IOException; import java.net.UnknownHostException; @@ -47,6 +48,7 @@ import org.apache.hadoop.yarn.event.Event; import org.apache.hadoop.yarn.event.EventHandler; import org.apache.tez.common.TezUtils; +import org.apache.tez.common.counters.DAGCounter; import org.apache.tez.dag.api.NamedEntityDescriptor; import org.apache.tez.dag.api.TezConstants; import org.apache.tez.dag.api.TezException; @@ -75,6 +77,9 @@ public class TestContainerLauncherManager { + private static final String DAG_NAME = "dagName"; + private static final int DAG_INDEX = 1; + @Before @After public void resetTest() { @@ -249,14 +254,44 @@ public void testEventRouting() throws Exception { } } - @SuppressWarnings("unchecked") + + @SuppressWarnings("rawtypes") + @Test + public void testContainerLaunchCounter() throws TezException, InterruptedException, IOException { + AppContext appContext = mock(AppContext.class); + DAG dag = mock(DAG.class); + when(appContext.getCurrentDAG()).thenReturn(dag); + + EventHandler eventHandler = mock(EventHandler.class); + doReturn(eventHandler).when(appContext).getEventHandler(); + doReturn("testlauncher").when(appContext).getContainerLauncherName(0); + + NamedEntityDescriptor containerLauncherDescriptor = + new NamedEntityDescriptor<>("testlauncher", ContainerLauncherForTest.class.getName()); + List descriptors = new LinkedList<>(); + descriptors.add(containerLauncherDescriptor); + + ContainerLauncherManager containerLauncherManager = + new ContainerLauncherManager(appContext, mock(TaskCommunicatorManagerInterface.class), "", descriptors, false); + + ContainerLaunchContext clc = mock(ContainerLaunchContext.class); + Container container = mock(Container.class); + ContainerLauncherLaunchRequestEvent launchRequestEvent = + new ContainerLauncherLaunchRequestEvent(clc, container, 0, 0, 0); + containerLauncherManager.handle(launchRequestEvent); + containerLauncherManager.close(); + + // ContainerLauncherForTest is properly calling the context callbacks + // so it's supposed to handle increment DAGCounter.TOTAL_CONTAINER_LAUNCH_COUNT + verify(dag).incrementDagCounter(DAGCounter.TOTAL_CONTAINER_LAUNCH_COUNT, 1); // launched + } + + @SuppressWarnings({ "unchecked", "rawtypes" }) @Test(timeout = 5000) public void testReportFailureFromContainerLauncher() throws ServicePluginException, TezException { - final String dagName = DAG_NAME; - final int dagIndex = DAG_INDEX; - TezDAGID dagId = TezDAGID.getInstance(ApplicationId.newInstance(0, 0), dagIndex); + TezDAGID dagId = TezDAGID.getInstance(ApplicationId.newInstance(0, 0), DAG_INDEX); DAG dag = mock(DAG.class); - doReturn(dagName).when(dag).getName(); + doReturn(DAG_NAME).when(dag).getName(); doReturn(dagId).when(dag).getID(); EventHandler eventHandler = mock(EventHandler.class); AppContext appContext = mock(AppContext.class); @@ -264,10 +299,10 @@ public void testReportFailureFromContainerLauncher() throws ServicePluginExcepti doReturn(dag).when(appContext).getCurrentDAG(); doReturn("testlauncher").when(appContext).getContainerLauncherName(0); - NamedEntityDescriptor taskCommDescriptor = - new NamedEntityDescriptor<>("testlauncher", ContainerLauncherForTest.class.getName()); + NamedEntityDescriptor containerLauncherDescriptor = + new NamedEntityDescriptor<>("testlauncher", FailureReporterContainerLauncher.class.getName()); List list = new LinkedList<>(); - list.add(taskCommDescriptor); + list.add(containerLauncherDescriptor); ContainerLauncherManager containerLauncherManager = new ContainerLauncherManager(appContext, mock(TaskCommunicatorManagerInterface.class), "", list, false); @@ -340,7 +375,7 @@ public void testContainerLauncherUserError() throws ServicePluginException { // launch container doThrow(new RuntimeException("testexception")).when(containerLauncher) - .launchContainer(any(ContainerLaunchRequest.class)); + .launchContainer(any()); ContainerLaunchContext clc1 = mock(ContainerLaunchContext.class); Container container1 = mock(Container.class); ContainerLauncherLaunchRequestEvent launchRequestEvent = @@ -365,7 +400,7 @@ public void testContainerLauncherUserError() throws ServicePluginException { // stop container doThrow(new RuntimeException("teststopexception")).when(containerLauncher) - .stopContainer(any(ContainerStopRequest.class)); + .stopContainer(any()); ContainerId containerId2 = mock(ContainerId.class); NodeId nodeId2 = mock(NodeId.class); ContainerLauncherStopRequestEvent stopRequestEvent = @@ -514,26 +549,41 @@ public void stopContainer(ContainerStopRequest stopRequest) { } } - private static final String DAG_NAME = "dagName"; - private static final int DAG_INDEX = 1; - public static class ContainerLauncherForTest extends ContainerLauncher { + public static class FailureReporterContainerLauncher extends ContainerLauncher { - public ContainerLauncherForTest( - ContainerLauncherContext containerLauncherContext) { + public FailureReporterContainerLauncher(ContainerLauncherContext containerLauncherContext) { super(containerLauncherContext); } @Override - public void launchContainer(ContainerLaunchRequest launchRequest) throws - ServicePluginException { + public void launchContainer(ContainerLaunchRequest launchRequest) throws ServicePluginException { getContext().reportError(ServicePluginErrorDefaults.INCONSISTENT_STATE, "ReportedFatalError", null); } @Override public void stopContainer(ContainerStopRequest stopRequest) throws ServicePluginException { - getContext() - .reportError(ServicePluginErrorDefaults.SERVICE_UNAVAILABLE, "ReportError", new DagInfoImplForTest(DAG_INDEX, DAG_NAME)); + getContext().reportError(ServicePluginErrorDefaults.SERVICE_UNAVAILABLE, "ReportError", + new DagInfoImplForTest(DAG_INDEX, DAG_NAME)); } } + /** + * This container launcher simply implements ContainerLauncher methods with the proper context callbacks. + */ + public static class ContainerLauncherForTest extends ContainerLauncher { + + public ContainerLauncherForTest(ContainerLauncherContext containerLauncherContext) { + super(containerLauncherContext); + } + + @Override + public void launchContainer(ContainerLaunchRequest launchRequest) throws ServicePluginException { + getContext().containerLaunched(launchRequest.getContainerId()); + } + + @Override + public void stopContainer(ContainerStopRequest stopRequest) throws ServicePluginException { + getContext().containerStopRequested(stopRequest.getContainerId()); + } + } } diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/launcher/TestContainerLauncherWrapper.java b/tez-dag/src/test/java/org/apache/tez/dag/app/launcher/TestContainerLauncherWrapper.java index 8778f32d58..cb7d62dca0 100644 --- a/tez-dag/src/test/java/org/apache/tez/dag/app/launcher/TestContainerLauncherWrapper.java +++ b/tez-dag/src/test/java/org/apache/tez/dag/app/launcher/TestContainerLauncherWrapper.java @@ -24,7 +24,7 @@ public class TestContainerLauncherWrapper { @Test(timeout = 5000) public void testDelegation() throws Exception { PluginWrapperTestHelpers.testDelegation(ContainerLauncherWrapper.class, ContainerLauncher.class, - Sets.newHashSet("getContainerLauncher", "dagComplete")); + Sets.newHashSet("getContainerLauncher", "dagComplete", "vertexComplete", "taskAttemptFailed")); } } diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/launcher/TestTezLocalCacheManager.java b/tez-dag/src/test/java/org/apache/tez/dag/app/launcher/TestTezLocalCacheManager.java new file mode 100644 index 0000000000..5596dc8feb --- /dev/null +++ b/tez-dag/src/test/java/org/apache/tez/dag/app/launcher/TestTezLocalCacheManager.java @@ -0,0 +1,154 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tez.dag.app.launcher; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileContext; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.yarn.api.records.LocalResource; +import org.apache.hadoop.yarn.api.records.LocalResourceType; +import org.apache.hadoop.yarn.api.records.LocalResourceVisibility; +import org.apache.hadoop.yarn.api.records.URL; +import org.apache.hadoop.yarn.factories.RecordFactory; +import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider; +import org.apache.hadoop.yarn.util.ConverterUtils; +import org.apache.tez.dag.api.TezConfiguration; +import org.junit.Assert; +import org.junit.Test; + +import java.io.File; +import java.io.IOException; +import java.net.URISyntaxException; +import java.nio.file.Files; +import java.nio.file.Paths; +import java.util.HashMap; +import java.util.Map; + +/** + * Test local cache manager. + */ +public class TestTezLocalCacheManager { + + @Test + public void testManager() throws URISyntaxException, IOException { + Map resources = new HashMap<>(); + + // Test that localization works for regular files and verify that if multiple symlinks are created, + // they all work + LocalResource resourceOne = createFile("content-one"); + LocalResource resourceTwo = createFile("content-two"); + + resources.put("file-one", resourceOne); + resources.put("file-two", resourceTwo); + resources.put("file-three", resourceTwo); + + // Not currently supported, but shouldn't throw an exception... + resources.put("some-subdir/file-three", resourceTwo); + + TezLocalCacheManager manager = new TezLocalCacheManager(resources, new Configuration()); + + try { + manager.localize(); + + Assert.assertEquals( + "content-one", + new String(Files.readAllBytes(Paths.get("./file-one"))) + ); + + Assert.assertEquals( + "content-two", + new String(Files.readAllBytes(Paths.get("./file-two"))) + ); + + Assert.assertEquals( + "content-two", + new String(Files.readAllBytes(Paths.get("./file-three"))) + ); + } finally { + manager.cleanup(); + } + + // verify that symlinks were removed + Assert.assertFalse(Files.exists(Paths.get("./file-one"))); + Assert.assertFalse(Files.exists(Paths.get("./file-two"))); + Assert.assertFalse(Files.exists(Paths.get("./file-three"))); + } + + // create a temporary file with the given content and return a LocalResource + private static LocalResource createFile(String content) throws IOException { + FileContext fs = FileContext.getLocalFSFileContext(); + + java.nio.file.Path tempFile = Files.createTempFile("test-cache-manager", ".txt"); + File temp = tempFile.toFile(); + temp.deleteOnExit(); + Path p = new Path("file:///" + tempFile.toAbsolutePath().toString()); + + Files.write(tempFile, content.getBytes()); + + RecordFactory recordFactory = RecordFactoryProvider.getRecordFactory(null); + LocalResource ret = recordFactory.newRecordInstance(LocalResource.class); + URL yarnUrlFromPath = ConverterUtils.getYarnUrlFromPath(p); + ret.setResource(yarnUrlFromPath); + ret.setSize(content.getBytes().length); + ret.setType(LocalResourceType.FILE); + ret.setVisibility(LocalResourceVisibility.PRIVATE); + ret.setTimestamp(fs.getFileStatus(p).getModificationTime()); + return ret; + } + + @Test + public void testLocalizeRootDirectory() throws URISyntaxException, IOException { + // default directory + Map resources = new HashMap<>(); + + LocalResource resourceOne = createFile("content-one"); + resources.put("file-one", resourceOne); + + TezLocalCacheManager manager = new TezLocalCacheManager(resources, new Configuration()); + + try { + Assert.assertFalse(Files.exists(Paths.get("./file-one"))); + manager.localize(); + Assert.assertTrue(Files.exists(Paths.get("./file-one"))); + + } finally { + manager.cleanup(); + Assert.assertFalse(Files.exists(Paths.get("./file-one"))); + } + + // configured directory + Configuration conf = new Configuration(); + conf.set(TezConfiguration.TEZ_LOCAL_CACHE_ROOT_FOLDER, "target"); + manager = new TezLocalCacheManager(resources, conf); + + try { + // files don't exist at all + Assert.assertFalse(Files.exists(Paths.get("./file-one"))); + Assert.assertFalse(Files.exists(Paths.get("./target/file-one"))); + manager.localize(); + // file appears only at configured location + Assert.assertFalse(Files.exists(Paths.get("./file-one"))); + Assert.assertTrue(Files.exists(Paths.get("./target/file-one"))); + + } finally { + manager.cleanup(); + Assert.assertFalse(Files.exists(Paths.get("./target/file-one"))); + } + } +} diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestContainerReuse.java b/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestContainerReuse.java index 7e9e9abbb6..1fc418f8c2 100644 --- a/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestContainerReuse.java +++ b/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestContainerReuse.java @@ -21,10 +21,10 @@ import static org.junit.Assert.assertNull; import static org.junit.Assert.assertTrue; import static org.junit.Assert.assertEquals; -import static org.mockito.Matchers.any; -import static org.mockito.Matchers.eq; -import static org.mockito.Mockito.doReturn; +import static org.mockito.Mockito.any; import static org.mockito.Mockito.doAnswer; +import static org.mockito.Mockito.doReturn; +import static org.mockito.Mockito.eq; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.spy; import static org.mockito.Mockito.times; @@ -38,7 +38,9 @@ import java.util.concurrent.ExecutionException; import java.util.concurrent.atomic.AtomicBoolean; +import org.apache.hadoop.yarn.api.records.LocalResourceType; import org.apache.tez.common.TezUtils; +import org.apache.tez.common.counters.DAGCounter; import org.apache.tez.serviceplugins.api.TaskScheduler; import org.mockito.ArgumentCaptor; import org.slf4j.Logger; @@ -54,7 +56,6 @@ import org.apache.hadoop.yarn.api.records.Priority; import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.client.api.AMRMClient; -import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.tez.common.MockDNSToSwitchMapping; import org.apache.tez.dag.api.InputDescriptor; import org.apache.tez.dag.api.OutputDescriptor; @@ -69,6 +70,7 @@ import org.apache.tez.dag.app.ContainerHeartbeatHandler; import org.apache.tez.dag.app.DAGAppMasterState; import org.apache.tez.dag.app.TaskCommunicatorManagerInterface; +import org.apache.tez.dag.app.dag.DAG; import org.apache.tez.dag.app.dag.TaskAttempt; import org.apache.tez.dag.app.rm.YarnTaskSchedulerService.CookieContainerRequest; import org.apache.tez.dag.app.rm.TestTaskSchedulerHelpers.AMRMClientAsyncForTest; @@ -113,7 +115,7 @@ public static void setup() { public void testDelayedReuseContainerBecomesAvailable() throws IOException, InterruptedException, ExecutionException { LOG.info("Test testDelayedReuseContainerBecomesAvailable"); - Configuration conf = new Configuration(new YarnConfiguration()); + Configuration conf = new Configuration(); conf.setBoolean( TezConfiguration.TEZ_AM_CONTAINER_REUSE_ENABLED, true); conf.setBoolean( @@ -126,6 +128,7 @@ public void testDelayedReuseContainerBecomesAvailable() conf.setLong(TezConfiguration.TEZ_AM_CONTAINER_IDLE_RELEASE_TIMEOUT_MAX_MILLIS, 0); CapturingEventHandler eventHandler = new CapturingEventHandler(); + DAG dag = mock(DAG.class); TezDAGID dagID = TezDAGID.getInstance("0", 0, 0); TezVertexID vertexID = TezVertexID.getInstance(dagID, 1); @@ -143,6 +146,7 @@ public void testDelayedReuseContainerBecomesAvailable() doReturn(amContainerMap).when(appContext).getAllContainers(); doReturn(DAGAppMasterState.RUNNING).when(appContext).getAMState(); doReturn(amNodeTracker).when(appContext).getNodeTracker(); + doReturn(dag).when(appContext).getCurrentDAG(); doReturn(dagID).when(appContext).getCurrentDAGID(); doReturn(mock(ClusterInfo.class)).when(appContext).getClusterInfo(); @@ -203,9 +207,9 @@ public void testDelayedReuseContainerBecomesAvailable() TestTaskSchedulerHelpers.waitForDelayedDrainNotify(drainNotifier); drainableAppCallback.drain(); verify(taskSchedulerManager).taskAllocated( - eq(0), eq(ta11), any(Object.class), eq(containerHost1)); + eq(0), eq(ta11), any(), eq(containerHost1)); verify(taskSchedulerManager).taskAllocated( - eq(0), eq(ta21), any(Object.class), eq(containerHost2)); + eq(0), eq(ta21), any(), eq(containerHost2)); // Adding the event later so that task1 assigned to containerHost1 // is deterministic. @@ -217,9 +221,9 @@ public void testDelayedReuseContainerBecomesAvailable() drainableAppCallback.drain(); verifyDeAllocateTask(taskScheduler, ta11, true, null, null); verify(taskSchedulerManager, times(1)).taskAllocated( - eq(0), eq(ta31), any(Object.class), eq(containerHost1)); + eq(0), eq(ta31), any(), eq(containerHost1)); verify(rmClient, times(0)).releaseAssignedContainer( - eq(containerHost1.getId())); + eq(containerHost1.getId())); eventHandler.verifyNoInvocations(AMContainerEventStopRequest.class); eventHandler.reset(); @@ -248,7 +252,7 @@ public void testDelayedReuseContainerBecomesAvailable() public void testDelayedReuseContainerNotAvailable() throws IOException, InterruptedException, ExecutionException { LOG.info("Test testDelayedReuseContainerNotAvailable"); - Configuration conf = new Configuration(new YarnConfiguration()); + Configuration conf = new Configuration(); conf.setBoolean(TezConfiguration.TEZ_AM_CONTAINER_REUSE_ENABLED, true); conf.setBoolean(TezConfiguration.TEZ_AM_CONTAINER_REUSE_RACK_FALLBACK_ENABLED, false); conf.setBoolean(TezConfiguration.TEZ_AM_CONTAINER_REUSE_NON_LOCAL_FALLBACK_ENABLED, false); @@ -257,6 +261,7 @@ public void testDelayedReuseContainerNotAvailable() conf.setLong(TezConfiguration.TEZ_AM_CONTAINER_IDLE_RELEASE_TIMEOUT_MAX_MILLIS, 0); CapturingEventHandler eventHandler = new CapturingEventHandler(); + DAG dag = mock(DAG.class); TezDAGID dagID = TezDAGID.getInstance("0", 0, 0); TezVertexID vertexID = TezVertexID.getInstance(dagID, 1); @@ -274,6 +279,7 @@ public void testDelayedReuseContainerNotAvailable() doReturn(amContainerMap).when(appContext).getAllContainers(); doReturn(amNodeTracker).when(appContext).getNodeTracker(); doReturn(DAGAppMasterState.RUNNING).when(appContext).getAMState(); + doReturn(dag).when(appContext).getCurrentDAG(); doReturn(dagID).when(appContext).getCurrentDAGID(); doReturn(mock(ClusterInfo.class)).when(appContext).getClusterInfo(); @@ -325,8 +331,8 @@ public void testDelayedReuseContainerNotAvailable() taskScheduler.onContainersAllocated(Lists.newArrayList(containerHost1, containerHost2)); TestTaskSchedulerHelpers.waitForDelayedDrainNotify(drainNotifier); drainableAppCallback.drain(); - verify(taskSchedulerManager).taskAllocated(eq(0), eq(ta11), any(Object.class), eq(containerHost1)); - verify(taskSchedulerManager).taskAllocated(eq(0), eq(ta21), any(Object.class), + verify(taskSchedulerManager).taskAllocated(eq(0), eq(ta11), any(), eq(containerHost1)); + verify(taskSchedulerManager).taskAllocated(eq(0), eq(ta21), any(), eq(containerHost2)); // Adding the event later so that task1 assigned to containerHost1 is deterministic. @@ -338,7 +344,7 @@ public void testDelayedReuseContainerNotAvailable() drainableAppCallback.drain(); verifyDeAllocateTask(taskScheduler, ta21, true, null, null); verify(taskSchedulerManager, times(0)).taskAllocated( - eq(0), eq(ta31), any(Object.class), eq(containerHost2)); + eq(0), eq(ta31), any(), eq(containerHost2)); verify(rmClient, times(1)).releaseAssignedContainer( eq(containerHost2.getId())); eventHandler.verifyInvocation(AMContainerEventStopRequest.class); @@ -350,7 +356,7 @@ public void testDelayedReuseContainerNotAvailable() @Test(timeout = 10000l) public void testSimpleReuse() throws IOException, InterruptedException, ExecutionException { LOG.info("Test testSimpleReuse"); - Configuration tezConf = new Configuration(new YarnConfiguration()); + Configuration tezConf = new Configuration(); tezConf.setBoolean(TezConfiguration.TEZ_AM_CONTAINER_REUSE_ENABLED, true); tezConf.setBoolean(TezConfiguration.TEZ_AM_CONTAINER_REUSE_RACK_FALLBACK_ENABLED, true); tezConf.setLong(TezConfiguration.TEZ_AM_CONTAINER_REUSE_LOCALITY_DELAY_ALLOCATION_MILLIS, 0); @@ -358,6 +364,7 @@ public void testSimpleReuse() throws IOException, InterruptedException, Executio tezConf.setLong(TezConfiguration.TEZ_AM_CONTAINER_IDLE_RELEASE_TIMEOUT_MAX_MILLIS, 0); CapturingEventHandler eventHandler = new CapturingEventHandler(); + DAG dag = mock(DAG.class); TezDAGID dagID = TezDAGID.getInstance("0", 0, 0); AMRMClient rmClientCore = new AMRMClientForTest(); @@ -371,17 +378,18 @@ public void testSimpleReuse() throws IOException, InterruptedException, Executio doReturn(amContainerMap).when(appContext).getAllContainers(); doReturn(amNodeTracker).when(appContext).getNodeTracker(); doReturn(DAGAppMasterState.RUNNING).when(appContext).getAMState(); + doReturn(dag).when(appContext).getCurrentDAG(); doReturn(dagID).when(appContext).getCurrentDAGID(); doReturn(mock(ClusterInfo.class)).when(appContext).getClusterInfo(); - TaskSchedulerManager - taskSchedulerManagerReal = new TaskSchedulerManagerForTest(appContext, eventHandler, rmClient, new AlwaysMatchesContainerMatcher(), TezUtils.createUserPayloadFromConf(tezConf)); + TaskSchedulerManager taskSchedulerManagerReal = new TaskSchedulerManagerForTest(appContext, eventHandler, rmClient, + new AlwaysMatchesContainerMatcher(), TezUtils.createUserPayloadFromConf(tezConf)); TaskSchedulerManager taskSchedulerManager = spy(taskSchedulerManagerReal); taskSchedulerManager.init(tezConf); taskSchedulerManager.start(); - TaskSchedulerWithDrainableContext taskScheduler = (TaskSchedulerWithDrainableContext) ((TaskSchedulerManagerForTest) taskSchedulerManager) - .getSpyTaskScheduler(); + TaskSchedulerWithDrainableContext taskScheduler = + (TaskSchedulerWithDrainableContext) ((TaskSchedulerManagerForTest) taskSchedulerManager).getSpyTaskScheduler(); TaskSchedulerContextDrainable drainableAppCallback = taskScheduler.getDrainableAppCallback(); AtomicBoolean drainNotifier = new AtomicBoolean(false); taskScheduler.delayedContainerManager.drainedDelayedContainersForTest = drainNotifier; @@ -426,8 +434,9 @@ public void testSimpleReuse() throws IOException, InterruptedException, Executio taskScheduler.onContainersAllocated(Collections.singletonList(container1)); TestTaskSchedulerHelpers.waitForDelayedDrainNotify(drainNotifier); drainableAppCallback.drain(); - verify(taskSchedulerManager).taskAllocated(eq(0), eq(ta11), any(Object.class), + verify(taskSchedulerManager).taskAllocated(eq(0), eq(ta11), any(), eq(container1)); + verify(dag).incrementDagCounter(DAGCounter.TOTAL_CONTAINER_ALLOCATION_COUNT, 1); // allocated // Task assigned to container completed successfully. Container should be re-used. taskSchedulerManager.handleEvent( @@ -435,9 +444,10 @@ public void testSimpleReuse() throws IOException, InterruptedException, Executio null, 0)); drainableAppCallback.drain(); verifyDeAllocateTask(taskScheduler, ta11, true, null, null); - verify(taskSchedulerManager).taskAllocated(eq(0), eq(ta12), any(Object.class), eq(container1)); + verify(taskSchedulerManager).taskAllocated(eq(0), eq(ta12), any(), eq(container1)); verify(rmClient, times(0)).releaseAssignedContainer(eq(container1.getId())); eventHandler.verifyNoInvocations(AMContainerEventStopRequest.class); + verify(dag).incrementDagCounter(DAGCounter.TOTAL_CONTAINER_REUSE_COUNT, 1); // reused eventHandler.reset(); // Task assigned to container completed successfully. @@ -447,10 +457,11 @@ public void testSimpleReuse() throws IOException, InterruptedException, Executio null, 0)); drainableAppCallback.drain(); verifyDeAllocateTask(taskScheduler, ta12, true, null, null); - verify(taskSchedulerManager).taskAllocated(eq(0), eq(ta13), any(Object.class), + verify(taskSchedulerManager).taskAllocated(eq(0), eq(ta13), any(), eq(container1)); verify(rmClient, times(0)).releaseAssignedContainer(eq(container1.getId())); eventHandler.verifyNoInvocations(AMContainerEventStopRequest.class); + verify(dag, times(2)).incrementDagCounter(DAGCounter.TOTAL_CONTAINER_REUSE_COUNT, 1); // reused again eventHandler.reset(); // Verify no re-use if a previous task fails. @@ -458,11 +469,12 @@ public void testSimpleReuse() throws IOException, InterruptedException, Executio new AMSchedulerEventTAEnded(ta13, container1.getId(), TaskAttemptState.FAILED, null, "TIMEOUT", 0)); drainableAppCallback.drain(); - verify(taskSchedulerManager, times(0)).taskAllocated(eq(0), eq(ta14), any(Object.class), + verify(taskSchedulerManager, times(0)).taskAllocated(eq(0), eq(ta14), any(), eq(container1)); verifyDeAllocateTask(taskScheduler, ta13, false, null, "TIMEOUT"); verify(rmClient).releaseAssignedContainer(eq(container1.getId())); eventHandler.verifyInvocation(AMContainerEventStopRequest.class); + verify(dag).incrementDagCounter(DAGCounter.TOTAL_CONTAINER_RELEASE_COUNT, 1); // released eventHandler.reset(); Container container2 = createContainer(2, "host2", resource1, priority1); @@ -471,7 +483,7 @@ public void testSimpleReuse() throws IOException, InterruptedException, Executio taskScheduler.onContainersAllocated(Collections.singletonList(container2)); TestTaskSchedulerHelpers.waitForDelayedDrainNotify(drainNotifier); drainableAppCallback.drain(); - verify(taskSchedulerManager).taskAllocated(eq(0), eq(ta14), any(Object.class), + verify(taskSchedulerManager).taskAllocated(eq(0), eq(ta14), any(), eq(container2)); // Task assigned to container completed successfully. No pending requests. Container should be released. @@ -482,16 +494,19 @@ public void testSimpleReuse() throws IOException, InterruptedException, Executio verifyDeAllocateTask(taskScheduler, ta14, true, null, null); verify(rmClient).releaseAssignedContainer(eq(container2.getId())); eventHandler.verifyInvocation(AMContainerEventStopRequest.class); + verify(dag, times(2)).incrementDagCounter(DAGCounter.TOTAL_CONTAINER_ALLOCATION_COUNT, 1); // new allocation + verify(dag, times(2)).incrementDagCounter(DAGCounter.TOTAL_CONTAINER_RELEASE_COUNT, 1); // then released again eventHandler.reset(); taskScheduler.shutdown(); taskSchedulerManager.close(); + dag.onFinish(); } @Test(timeout = 10000l) public void testReuseWithTaskSpecificLaunchCmdOption() throws IOException, InterruptedException, ExecutionException { LOG.info("Test testReuseWithTaskSpecificLaunchCmdOption"); - Configuration tezConf = new Configuration(new YarnConfiguration()); + Configuration tezConf = new Configuration(); tezConf.setBoolean(TezConfiguration.TEZ_AM_CONTAINER_REUSE_ENABLED, true); tezConf.setBoolean(TezConfiguration.TEZ_AM_CONTAINER_REUSE_RACK_FALLBACK_ENABLED, true); tezConf.setLong(TezConfiguration.TEZ_AM_CONTAINER_REUSE_LOCALITY_DELAY_ALLOCATION_MILLIS, 0); @@ -503,6 +518,7 @@ public void testReuseWithTaskSpecificLaunchCmdOption() throws IOException, Inter TaskSpecificLaunchCmdOption taskSpecificLaunchCmdOption = new TaskSpecificLaunchCmdOption(tezConf); CapturingEventHandler eventHandler = new CapturingEventHandler(); + DAG dag = mock(DAG.class); TezDAGID dagID = TezDAGID.getInstance("0", 0, 0); AMRMClient rmClientCore = new AMRMClientForTest(); @@ -516,6 +532,7 @@ public void testReuseWithTaskSpecificLaunchCmdOption() throws IOException, Inter doReturn(amContainerMap).when(appContext).getAllContainers(); doReturn(amNodeTracker).when(appContext).getNodeTracker(); doReturn(DAGAppMasterState.RUNNING).when(appContext).getAMState(); + doReturn(dag).when(appContext).getCurrentDAG(); doReturn(dagID).when(appContext).getCurrentDAGID(); doReturn(mock(ClusterInfo.class)).when(appContext).getClusterInfo(); @@ -571,7 +588,7 @@ public void testReuseWithTaskSpecificLaunchCmdOption() throws IOException, Inter taskScheduler.onContainersAllocated(Collections.singletonList(container1)); TestTaskSchedulerHelpers.waitForDelayedDrainNotify(drainNotifier); drainableAppCallback.drain(); - verify(taskSchedulerManager).taskAllocated(eq(0), eq(ta11), any(Object.class), + verify(taskSchedulerManager).taskAllocated(eq(0), eq(ta11), any(), eq(container1)); // First task had profiling on. This container can not be reused further. @@ -580,7 +597,7 @@ public void testReuseWithTaskSpecificLaunchCmdOption() throws IOException, Inter null, 0)); drainableAppCallback.drain(); verifyDeAllocateTask(taskScheduler, ta11, true, null, null); - verify(taskSchedulerManager, times(0)).taskAllocated(eq(0), eq(ta12), any(Object.class), + verify(taskSchedulerManager, times(0)).taskAllocated(eq(0), eq(ta12), any(), eq(container1)); verify(rmClient, times(1)).releaseAssignedContainer(eq(container1.getId())); eventHandler.verifyInvocation(AMContainerEventStopRequest.class); @@ -614,7 +631,7 @@ public void testReuseWithTaskSpecificLaunchCmdOption() throws IOException, Inter taskScheduler.onContainersAllocated(Collections.singletonList(container2)); TestTaskSchedulerHelpers.waitForDelayedDrainNotify(drainNotifier); drainableAppCallback.drain(); - verify(taskSchedulerManager).taskAllocated(eq(0), eq(ta13), any(Object.class), eq(container2)); + verify(taskSchedulerManager).taskAllocated(eq(0), eq(ta13), any(), eq(container2)); // Verify that the container can not be reused when profiling option is turned on // Even for 2 tasks having same profiling option can have container reusability. @@ -623,7 +640,7 @@ public void testReuseWithTaskSpecificLaunchCmdOption() throws IOException, Inter null, 0)); drainableAppCallback.drain(); verifyDeAllocateTask(taskScheduler, ta13, true, null, null); - verify(taskSchedulerManager, times(0)).taskAllocated(eq(0), eq(ta14), any(Object.class), + verify(taskSchedulerManager, times(0)).taskAllocated(eq(0), eq(ta14), any(), eq(container2)); verify(rmClient, times(1)).releaseAssignedContainer(eq(container2.getId())); eventHandler.verifyInvocation(AMContainerEventStopRequest.class); @@ -661,7 +678,7 @@ public void testReuseWithTaskSpecificLaunchCmdOption() throws IOException, Inter taskScheduler.onContainersAllocated(Collections.singletonList(container3)); TestTaskSchedulerHelpers.waitForDelayedDrainNotify(drainNotifier); drainableAppCallback.drain(); - verify(taskSchedulerManager).taskAllocated(eq(0), eq(ta15), any(Object.class), + verify(taskSchedulerManager).taskAllocated(eq(0), eq(ta15), any(), eq(container3)); //Ensure task 6 (of vertex 1) is allocated to same container @@ -670,7 +687,7 @@ public void testReuseWithTaskSpecificLaunchCmdOption() throws IOException, Inter null, 0)); drainableAppCallback.drain(); verifyDeAllocateTask(taskScheduler, ta15, true, null, null); - verify(taskSchedulerManager).taskAllocated(eq(0), eq(ta16), any(Object.class), eq(container3)); + verify(taskSchedulerManager).taskAllocated(eq(0), eq(ta16), any(), eq(container3)); eventHandler.reset(); taskScheduler.shutdown(); @@ -681,7 +698,7 @@ public void testReuseWithTaskSpecificLaunchCmdOption() throws IOException, Inter public void testReuseNonLocalRequest() throws IOException, InterruptedException, ExecutionException { LOG.info("Test testReuseNonLocalRequest"); - Configuration tezConf = new Configuration(new YarnConfiguration()); + Configuration tezConf = new Configuration(); tezConf.setBoolean(TezConfiguration.TEZ_AM_CONTAINER_REUSE_ENABLED, true); tezConf.setBoolean(TezConfiguration.TEZ_AM_CONTAINER_REUSE_RACK_FALLBACK_ENABLED, true); tezConf.setBoolean(TezConfiguration.TEZ_AM_CONTAINER_REUSE_NON_LOCAL_FALLBACK_ENABLED, true); @@ -690,6 +707,7 @@ public void testReuseNonLocalRequest() tezConf.setLong(TezConfiguration.TEZ_AM_CONTAINER_IDLE_RELEASE_TIMEOUT_MAX_MILLIS, 1000l); CapturingEventHandler eventHandler = new CapturingEventHandler(); + DAG dag = mock(DAG.class); TezDAGID dagID = TezDAGID.getInstance("0", 0, 0); AMRMClient rmClientCore = new AMRMClientForTest(); @@ -706,6 +724,7 @@ public void testReuseNonLocalRequest() doReturn(amContainerMap).when(appContext).getAllContainers(); doReturn(amNodeTracker).when(appContext).getNodeTracker(); doReturn(DAGAppMasterState.RUNNING).when(appContext).getAMState(); + doReturn(dag).when(appContext).getCurrentDAG(); doReturn(dagID).when(appContext).getCurrentDAGID(); doReturn(mock(ClusterInfo.class)).when(appContext).getClusterInfo(); @@ -759,7 +778,7 @@ public void testReuseNonLocalRequest() TestTaskSchedulerHelpers.waitForDelayedDrainNotify(drainNotifier); drainableAppCallback.drain(); verify(taskSchedulerManager).taskAllocated( - eq(0), eq(ta11), any(Object.class), eq(container1)); + eq(0), eq(ta11), any(), eq(container1)); // Send launch request for task2 (vertex2) taskSchedulerManager.handleEvent(lrEvent12); @@ -773,7 +792,7 @@ public void testReuseNonLocalRequest() drainableAppCallback.drain(); verifyDeAllocateTask(taskScheduler, ta11, true, null, null); verify(taskSchedulerManager, times(0)).taskAllocated( - eq(0), eq(ta12), any(Object.class), eq(container1)); + eq(0), eq(ta12), any(), eq(container1)); verify(rmClient, times(0)).releaseAssignedContainer(eq(container1.getId())); eventHandler.verifyNoInvocations(AMContainerEventStopRequest.class); eventHandler.reset(); @@ -781,7 +800,7 @@ public void testReuseNonLocalRequest() TestTaskSchedulerHelpers.waitForDelayedDrainNotify(drainNotifier); drainableAppCallback.drain(); verify(taskSchedulerManager).taskAllocated( - eq(0), eq(ta12), any(Object.class), eq(container1)); + eq(0), eq(ta12), any(), eq(container1)); // TA12 completed. taskSchedulerManager.handleEvent( @@ -802,7 +821,7 @@ public void testReuseNonLocalRequest() public void testReuseAcrossVertices() throws IOException, InterruptedException, ExecutionException { LOG.info("Test testReuseAcrossVertices"); - Configuration tezConf = new Configuration(new YarnConfiguration()); + Configuration tezConf = new Configuration(); tezConf.setBoolean(TezConfiguration.TEZ_AM_CONTAINER_REUSE_ENABLED, true); tezConf.setLong( TezConfiguration.TEZ_AM_CONTAINER_REUSE_LOCALITY_DELAY_ALLOCATION_MILLIS, 1l); @@ -814,6 +833,7 @@ public void testReuseAcrossVertices() TezConfiguration.TEZ_AM_SESSION_MIN_HELD_CONTAINERS, 1); CapturingEventHandler eventHandler = new CapturingEventHandler(); + DAG dag = mock(DAG.class); TezDAGID dagID = TezDAGID.getInstance("0", 0, 0); AMRMClient rmClientCore = new AMRMClientForTest(); @@ -831,6 +851,7 @@ public void testReuseAcrossVertices() doReturn(amNodeTracker).when(appContext).getNodeTracker(); doReturn(DAGAppMasterState.RUNNING).when(appContext).getAMState(); doReturn(true).when(appContext).isSession(); + doReturn(dag).when(appContext).getCurrentDAG(); doReturn(dagID).when(appContext).getCurrentDAGID(); doReturn(mock(ClusterInfo.class)).when(appContext).getClusterInfo(); @@ -887,7 +908,7 @@ public void testReuseAcrossVertices() TestTaskSchedulerHelpers.waitForDelayedDrainNotify(drainNotifier); drainableAppCallback.drain(); verify(taskSchedulerManager).taskAllocated( - eq(0), eq(ta11), any(Object.class), eq(container1)); + eq(0), eq(ta11), any(), eq(container1)); // Send launch request for task2 (vertex2) taskSchedulerManager.handleEvent(lrEvent21); @@ -900,7 +921,7 @@ public void testReuseAcrossVertices() drainableAppCallback.drain(); verifyDeAllocateTask(taskScheduler, ta11, true, null, null); verify(taskSchedulerManager).taskAllocated( - eq(0), eq(ta21), any(Object.class), eq(container1)); + eq(0), eq(ta21), any(), eq(container1)); verify(rmClient, times(0)).releaseAssignedContainer(eq(container1.getId())); // Task 2 completes. @@ -922,7 +943,7 @@ public void testReuseAcrossVertices() @Test(timeout = 30000l) public void testReuseLocalResourcesChanged() throws IOException, InterruptedException, ExecutionException { LOG.info("Test testReuseLocalResourcesChanged"); - Configuration tezConf = new Configuration(new YarnConfiguration()); + Configuration tezConf = new Configuration(); tezConf.setBoolean(TezConfiguration.TEZ_AM_CONTAINER_REUSE_ENABLED, true); tezConf.setBoolean(TezConfiguration.TEZ_AM_CONTAINER_REUSE_RACK_FALLBACK_ENABLED, true); tezConf.setBoolean(TezConfiguration.TEZ_AM_CONTAINER_REUSE_NON_LOCAL_FALLBACK_ENABLED, true); @@ -931,6 +952,7 @@ public void testReuseLocalResourcesChanged() throws IOException, InterruptedExce CapturingEventHandler eventHandler = new CapturingEventHandler(); TezDAGID dagID1 = TezDAGID.getInstance("0", 1, 0); + DAG dag1 = mock(DAG.class); AMRMClient rmClientCore = new AMRMClientForTest(); TezAMRMClientAsync rmClient = spy(new AMRMClientAsyncForTest(rmClientCore, 100)); @@ -943,6 +965,7 @@ public void testReuseLocalResourcesChanged() throws IOException, InterruptedExce AMNodeTracker amNodeTracker = new AMNodeTracker(eventHandler, appContext); doReturn(amContainerMap).when(appContext).getAllContainers(); doReturn(amNodeTracker).when(appContext).getNodeTracker(); + doReturn(dag1).when(appContext).getCurrentDAG(); doReturn(DAGAppMasterState.RUNNING).when(appContext).getAMState(); doReturn(true).when(appContext).isSession(); doAnswer(dagIDAnswer).when(appContext).getCurrentDAGID(); @@ -999,7 +1022,7 @@ public void testReuseLocalResourcesChanged() throws IOException, InterruptedExce taskScheduler.onContainersAllocated(Collections.singletonList(container1)); TestTaskSchedulerHelpers.waitForDelayedDrainNotify(drainNotifier); drainableAppCallback.drain(); - verify(taskSchedulerManager).taskAllocated(eq(0), eq(ta111), any(Object.class), eq(container1)); + verify(taskSchedulerManager).taskAllocated(eq(0), eq(ta111), any(), eq(container1)); assignEvent = (AMContainerEventAssignTA) eventHandler.verifyInvocation(AMContainerEventAssignTA.class); assertEquals(1, assignEvent.getRemoteTaskLocalResources().size()); @@ -1009,7 +1032,7 @@ public void testReuseLocalResourcesChanged() throws IOException, InterruptedExce null, 0)); drainableAppCallback.drain(); verifyDeAllocateTask(taskScheduler, ta111, true, null, null); - verify(taskSchedulerManager).taskAllocated(eq(0), eq(ta112), any(Object.class), eq(container1)); + verify(taskSchedulerManager).taskAllocated(eq(0), eq(ta112), any(), eq(container1)); verify(rmClient, times(0)).releaseAssignedContainer(eq(container1.getId())); eventHandler.verifyNoInvocations(AMContainerEventStopRequest.class); assignEvent = (AMContainerEventAssignTA) eventHandler.verifyInvocation(AMContainerEventAssignTA.class); @@ -1052,7 +1075,7 @@ public void testReuseLocalResourcesChanged() throws IOException, InterruptedExce TestTaskSchedulerHelpers.waitForDelayedDrainNotify(drainNotifier); drainableAppCallback.drain(); - verify(taskSchedulerManager).taskAllocated(eq(0), eq(ta211), any(Object.class), + verify(taskSchedulerManager).taskAllocated(eq(0), eq(ta211), any(), eq(container1)); verify(rmClient, times(0)).releaseAssignedContainer(eq(container1.getId())); eventHandler.verifyNoInvocations(AMContainerEventStopRequest.class); @@ -1065,7 +1088,7 @@ public void testReuseLocalResourcesChanged() throws IOException, InterruptedExce null, 0)); drainableAppCallback.drain(); verifyDeAllocateTask(taskScheduler, ta211, true, null, null); - verify(taskSchedulerManager).taskAllocated(eq(0), eq(ta212), any(Object.class), eq(container1)); + verify(taskSchedulerManager).taskAllocated(eq(0), eq(ta212), any(), eq(container1)); verify(rmClient, times(0)).releaseAssignedContainer(eq(container1.getId())); eventHandler.verifyNoInvocations(AMContainerEventStopRequest.class); assignEvent = (AMContainerEventAssignTA) eventHandler.verifyInvocation(AMContainerEventAssignTA.class); @@ -1079,7 +1102,7 @@ public void testReuseLocalResourcesChanged() throws IOException, InterruptedExce @Test(timeout = 30000l) public void testReuseConflictLocalResources() throws IOException, InterruptedException, ExecutionException { LOG.info("Test testReuseLocalResourcesChanged"); - Configuration tezConf = new Configuration(new YarnConfiguration()); + Configuration tezConf = new Configuration(); tezConf.setBoolean(TezConfiguration.TEZ_AM_CONTAINER_REUSE_ENABLED, true); tezConf.setBoolean(TezConfiguration.TEZ_AM_CONTAINER_REUSE_RACK_FALLBACK_ENABLED, true); tezConf.setBoolean(TezConfiguration.TEZ_AM_CONTAINER_REUSE_NON_LOCAL_FALLBACK_ENABLED, true); @@ -1088,6 +1111,7 @@ public void testReuseConflictLocalResources() throws IOException, InterruptedExc CapturingEventHandler eventHandler = new CapturingEventHandler(); TezDAGID dagID1 = TezDAGID.getInstance("0", 1, 0); + DAG dag1 = mock(DAG.class); AMRMClient rmClientCore = new AMRMClientForTest(); TezAMRMClientAsync rmClient = spy(new AMRMClientAsyncForTest(rmClientCore, 100)); @@ -1100,6 +1124,7 @@ public void testReuseConflictLocalResources() throws IOException, InterruptedExc AMNodeTracker amNodeTracker = new AMNodeTracker(eventHandler, appContext); doReturn(amContainerMap).when(appContext).getAllContainers(); doReturn(amNodeTracker).when(appContext).getNodeTracker(); + doReturn(dag1).when(appContext).getCurrentDAG(); doReturn(DAGAppMasterState.RUNNING).when(appContext).getAMState(); doReturn(true).when(appContext).isSession(); doAnswer(dagIDAnswer).when(appContext).getCurrentDAGID(); @@ -1112,8 +1137,8 @@ public void testReuseConflictLocalResources() throws IOException, InterruptedExc taskSchedulerManager.init(tezConf); taskSchedulerManager.start(); - TaskSchedulerWithDrainableContext taskScheduler = (TaskSchedulerWithDrainableContext) ((TaskSchedulerManagerForTest) taskSchedulerManager) - .getSpyTaskScheduler(); + TaskSchedulerWithDrainableContext taskScheduler = + (TaskSchedulerWithDrainableContext) ((TaskSchedulerManagerForTest) taskSchedulerManager).getSpyTaskScheduler(); TaskSchedulerContextDrainable drainableAppCallback = taskScheduler.getDrainableAppCallback(); AtomicBoolean drainNotifier = new AtomicBoolean(false); taskScheduler.delayedContainerManager.drainedDelayedContainersForTest = drainNotifier; @@ -1127,8 +1152,11 @@ public void testReuseConflictLocalResources() throws IOException, InterruptedExc String rsrc1 = "rsrc1"; String rsrc2 = "rsrc2"; LocalResource lr1 = mock(LocalResource.class); + doReturn(LocalResourceType.FILE).when(lr1).getType(); LocalResource lr2 = mock(LocalResource.class); + doReturn(LocalResourceType.FILE).when(lr2).getType(); LocalResource lr3 = mock(LocalResource.class); + doReturn(LocalResourceType.FILE).when(lr3).getType(); AMContainerEventAssignTA assignEvent = null; @@ -1140,7 +1168,7 @@ public void testReuseConflictLocalResources() throws IOException, InterruptedExc //Vertex 1, Task 1, Attempt 1, host1, lr1 TezTaskAttemptID taID111 = TezTaskAttemptID.getInstance(TezTaskID.getInstance(vertexID11, 1), 1); TaskAttempt ta111 = mock(TaskAttempt.class); - doReturn(taID111).when(ta111).getID(); + doReturn(taID111).when(ta111).getTaskAttemptID(); doReturn("Mock for TA " + taID111.toString()).when(ta111).toString(); AMSchedulerEventTALaunchRequest lrEvent11 = createLaunchRequestEvent( taID111, ta111, resource1, host1, racks, priority1, v11LR); @@ -1152,7 +1180,7 @@ public void testReuseConflictLocalResources() throws IOException, InterruptedExc //Vertex 1, Task 2, Attempt 1, host1, lr1 TezTaskAttemptID taID112 = TezTaskAttemptID.getInstance(TezTaskID.getInstance(vertexID11, 2), 1); TaskAttempt ta112 = mock(TaskAttempt.class); - doReturn(taID112).when(ta112).getID(); + doReturn(taID112).when(ta112).getTaskAttemptID(); doReturn("Mock for TA " + taID112.toString()).when(ta112).toString(); AMSchedulerEventTALaunchRequest lrEvent12 = createLaunchRequestEvent( taID112, ta112, resource1, host1, racks, priority1, v12LR); @@ -1160,14 +1188,14 @@ public void testReuseConflictLocalResources() throws IOException, InterruptedExc //Vertex 1, Task 3, Attempt 1, host1 TezTaskAttemptID taID113 = TezTaskAttemptID.getInstance(TezTaskID.getInstance(vertexID11, 3), 1); TaskAttempt ta113 = mock(TaskAttempt.class); - doReturn(taID113).when(ta113).getID(); + doReturn(taID113).when(ta113).getTaskAttemptID(); doReturn("Mock for TA " + taID113.toString()).when(ta113).toString(); AMSchedulerEventTALaunchRequest lrEvent13 = createLaunchRequestEvent( taID113, ta113, resource1, host1, racks, priority1, new HashMap()); //Vertex 1, Task 4, Attempt 1, host1 TezTaskAttemptID taID114 = TezTaskAttemptID.getInstance(TezTaskID.getInstance(vertexID11, 4), 1); TaskAttempt ta114 = mock(TaskAttempt.class); - doReturn(taID114).when(ta114).getID(); + doReturn(taID114).when(ta114).getTaskAttemptID(); doReturn("Mock for TA " + taID114.toString()).when(ta114).toString(); AMSchedulerEventTALaunchRequest lrEvent14 = createLaunchRequestEvent( taID114, ta114, resource1, host1, racks, priority1, new HashMap()); @@ -1183,7 +1211,7 @@ public void testReuseConflictLocalResources() throws IOException, InterruptedExc taskScheduler.onContainersAllocated(Collections.singletonList(container1)); TestTaskSchedulerHelpers.waitForDelayedDrainNotify(drainNotifier); drainableAppCallback.drain(); - verify(taskSchedulerManager).taskAllocated(eq(0), eq(ta111), any(Object.class), eq(container1)); + verify(taskSchedulerManager).taskAllocated(eq(0), eq(ta111), any(), eq(container1)); assignEvent = (AMContainerEventAssignTA) eventHandler.verifyInvocation(AMContainerEventAssignTA.class); assertEquals(1, assignEvent.getRemoteTaskLocalResources().size()); @@ -1193,7 +1221,7 @@ public void testReuseConflictLocalResources() throws IOException, InterruptedExc null, 0)); drainableAppCallback.drain(); verifyDeAllocateTask(taskScheduler, ta111, true, null, null); - verify(taskSchedulerManager).taskAllocated(eq(0), eq(ta112), any(Object.class), eq(container1)); + verify(taskSchedulerManager).taskAllocated(eq(0), eq(ta112), any(), eq(container1)); verify(rmClient, times(0)).releaseAssignedContainer(eq(container1.getId())); eventHandler.verifyNoInvocations(AMContainerEventStopRequest.class); assignEvent = (AMContainerEventAssignTA) eventHandler.verifyInvocation(AMContainerEventAssignTA.class); @@ -1214,7 +1242,7 @@ public void testReuseConflictLocalResources() throws IOException, InterruptedExc taskSchedulerManager.handleEvent(lrEvent13); TestTaskSchedulerHelpers.waitForDelayedDrainNotify(drainNotifier); drainableAppCallback.drain(); - verify(taskSchedulerManager).taskAllocated(eq(0), eq(ta113), any(Object.class), eq(container1)); + verify(taskSchedulerManager).taskAllocated(eq(0), eq(ta113), any(), eq(container1)); verify(rmClient, times(0)).releaseAssignedContainer(eq(container1.getId())); eventHandler.verifyNoInvocations(AMContainerEventStopRequest.class); eventHandler.reset(); @@ -1231,7 +1259,7 @@ public void testReuseConflictLocalResources() throws IOException, InterruptedExc taskSchedulerManager.handleEvent(lrEvent14); TestTaskSchedulerHelpers.waitForDelayedDrainNotify(drainNotifier); drainableAppCallback.drain(); - verify(taskSchedulerManager).taskAllocated(eq(0), eq(ta114), any(Object.class), eq(container1)); + verify(taskSchedulerManager).taskAllocated(eq(0), eq(ta114), any(), eq(container1)); verify(rmClient, times(0)).releaseAssignedContainer(eq(container1.getId())); eventHandler.verifyNoInvocations(AMContainerEventStopRequest.class); eventHandler.reset(); @@ -1258,7 +1286,7 @@ public void testReuseConflictLocalResources() throws IOException, InterruptedExc //Vertex 2, Task 1, Attempt 1, host1, lr2 TezTaskAttemptID taID211 = TezTaskAttemptID.getInstance(TezTaskID.getInstance(vertexID21, 1), 1); TaskAttempt ta211 = mock(TaskAttempt.class); - doReturn(taID211).when(ta211).getID(); + doReturn(taID211).when(ta211).getTaskAttemptID(); doReturn("Mock for TA " + taID211.toString()).when(ta211).toString(); AMSchedulerEventTALaunchRequest lrEvent21 = createLaunchRequestEvent(taID211, ta211, resource1, host1, racks, priority1, v21LR); @@ -1273,7 +1301,7 @@ public void testReuseConflictLocalResources() throws IOException, InterruptedExc taskScheduler.onContainersAllocated(Collections.singletonList(container2)); TestTaskSchedulerHelpers.waitForDelayedDrainNotify(drainNotifier); drainableAppCallback.drain(); - verify(taskSchedulerManager).taskAllocated(eq(0), eq(ta211), any(Object.class), eq(container2)); + verify(taskSchedulerManager).taskAllocated(eq(0), eq(ta211), any(), eq(container2)); eventHandler.reset(); taskSchedulerManager.handleEvent(new AMSchedulerEventTAEnded(ta211, container2.getId(), @@ -1294,7 +1322,7 @@ public void testReuseConflictLocalResources() throws IOException, InterruptedExc TezTaskAttemptID taID311 = TezTaskAttemptID.getInstance( TezTaskID.getInstance(TezVertexID.getInstance(dagID3, 1), 1), 1); TaskAttempt ta311 = mock(TaskAttempt.class); - doReturn(taID311).when(ta311).getID(); + doReturn(taID311).when(ta311).getTaskAttemptID(); doReturn("Mock for TA " + taID311).when(ta311).toString(); AMSchedulerEventTALaunchRequest lrEvent31 = createLaunchRequestEvent(taID311, ta311, resource1, host1, racks, priority1, v31LR); @@ -1302,7 +1330,7 @@ public void testReuseConflictLocalResources() throws IOException, InterruptedExc taskSchedulerManager.handleEvent(lrEvent31); TestTaskSchedulerHelpers.waitForDelayedDrainNotify(drainNotifier); drainableAppCallback.drain(); - verify(taskSchedulerManager).taskAllocated(eq(0), eq(ta311), any(Object.class), eq(container2)); + verify(taskSchedulerManager).taskAllocated(eq(0), eq(ta311), any(), eq(container2)); eventHandler.reset(); taskScheduler.shutdown(); @@ -1313,7 +1341,7 @@ public void testReuseConflictLocalResources() throws IOException, InterruptedExc public void testAssignmentOnShutdown() throws IOException, InterruptedException, ExecutionException { LOG.info("Test testAssignmentOnShutdown"); - Configuration tezConf = new Configuration(new YarnConfiguration()); + Configuration tezConf = new Configuration(); tezConf.setBoolean(TezConfiguration.TEZ_AM_CONTAINER_REUSE_ENABLED, false); tezConf.setBoolean(TezConfiguration.TEZ_AM_CONTAINER_REUSE_RACK_FALLBACK_ENABLED, true); tezConf.setLong(TezConfiguration.TEZ_AM_CONTAINER_REUSE_LOCALITY_DELAY_ALLOCATION_MILLIS, 0); @@ -1321,6 +1349,7 @@ public void testAssignmentOnShutdown() tezConf.setLong(TezConfiguration.TEZ_AM_CONTAINER_IDLE_RELEASE_TIMEOUT_MAX_MILLIS, 0); CapturingEventHandler eventHandler = new CapturingEventHandler(); + DAG dag = mock(DAG.class); TezDAGID dagID = TezDAGID.getInstance("0", 0, 0); AMRMClient rmClientCore = new AMRMClientForTest(); @@ -1335,6 +1364,7 @@ public void testAssignmentOnShutdown() doReturn(amNodeTracker).when(appContext).getNodeTracker(); doReturn(DAGAppMasterState.SUCCEEDED).when(appContext).getAMState(); doReturn(true).when(appContext).isAMInCompletionState(); + doReturn(dag).when(appContext).getCurrentDAG(); doReturn(dagID).when(appContext).getCurrentDAGID(); doReturn(mock(ClusterInfo.class)).when(appContext).getClusterInfo(); @@ -1373,14 +1403,14 @@ public void testAssignmentOnShutdown() taskScheduler.onContainersAllocated(Collections.singletonList(container1)); drainableAppCallback.drain(); verify(taskSchedulerManager, times(0)).taskAllocated(eq(0), eq(ta11), - any(Object.class), eq(container1)); + any(), eq(container1)); taskScheduler.shutdown(); taskSchedulerManager.close(); } @Test(timeout=5000) public void testDifferentResourceContainerReuse() throws Exception { - Configuration tezConf = new Configuration(new YarnConfiguration()); + Configuration tezConf = new Configuration(); tezConf.setBoolean(TezConfiguration.TEZ_AM_CONTAINER_REUSE_ENABLED, true); tezConf.setBoolean(TezConfiguration.TEZ_AM_CONTAINER_REUSE_RACK_FALLBACK_ENABLED, true); tezConf.setLong(TezConfiguration.TEZ_AM_CONTAINER_REUSE_LOCALITY_DELAY_ALLOCATION_MILLIS, 0); @@ -1388,6 +1418,7 @@ public void testDifferentResourceContainerReuse() throws Exception { tezConf.setLong(TezConfiguration.TEZ_AM_CONTAINER_IDLE_RELEASE_TIMEOUT_MAX_MILLIS, 0); CapturingEventHandler eventHandler = new CapturingEventHandler(); + DAG dag = mock(DAG.class); TezDAGID dagID = TezDAGID.getInstance("0", 0, 0); AMRMClient rmClientCore = new AMRMClientForTest(); @@ -1402,6 +1433,7 @@ public void testDifferentResourceContainerReuse() throws Exception { doReturn(amContainerMap).when(appContext).getAllContainers(); doReturn(amNodeTracker).when(appContext).getNodeTracker(); doReturn(DAGAppMasterState.RUNNING).when(appContext).getAMState(); + doReturn(dag).when(appContext).getCurrentDAG(); doReturn(dagID).when(appContext).getCurrentDAGID(); doReturn(mock(ClusterInfo.class)).when(appContext).getClusterInfo(); @@ -1464,20 +1496,20 @@ eventHandler, rmClient, new AlwaysMatchesContainerMatcher(), taskScheduler.onContainersAllocated(Collections.singletonList(container1)); TestTaskSchedulerHelpers.waitForDelayedDrainNotify(drainNotifier); drainableAppCallback.drain(); - verify(taskSchedulerManager).taskAllocated(eq(0), eq(ta11), any(Object.class), eq(container1)); + verify(taskSchedulerManager).taskAllocated(eq(0), eq(ta11), any(), eq(container1)); // Second container allocated, should start ta13 taskScheduler.onContainersAllocated(Collections.singletonList(container2)); TestTaskSchedulerHelpers.waitForDelayedDrainNotify(drainNotifier); drainableAppCallback.drain(); - verify(taskSchedulerManager).taskAllocated(eq(0), eq(ta13), any(Object.class), eq(container2)); + verify(taskSchedulerManager).taskAllocated(eq(0), eq(ta13), any(), eq(container2)); // ta11 finished, should start ta12 taskSchedulerManager.handleEvent(new AMSchedulerEventTAEnded(ta11, container1.getId(), TaskAttemptState.SUCCEEDED, null, null, 0)); drainableAppCallback.drain(); verifyDeAllocateTask(taskScheduler, ta11, true, null, null); - verify(taskSchedulerManager).taskAllocated(eq(0), eq(ta12), any(Object.class), eq(container1)); + verify(taskSchedulerManager).taskAllocated(eq(0), eq(ta12), any(), eq(container1)); verify(rmClient, times(0)).releaseAssignedContainer(eq(container1.getId())); eventHandler.verifyNoInvocations(AMContainerEventStopRequest.class); eventHandler.reset(); @@ -1487,7 +1519,7 @@ eventHandler, rmClient, new AlwaysMatchesContainerMatcher(), TaskAttemptState.SUCCEEDED, null, null, 0)); drainableAppCallback.drain(); verifyDeAllocateTask(taskScheduler, ta13, true, null, null); - verify(taskSchedulerManager).taskAllocated(eq(0), eq(ta14), any(Object.class), eq(container2)); + verify(taskSchedulerManager).taskAllocated(eq(0), eq(ta14), any(), eq(container2)); verify(rmClient, times(0)).releaseAssignedContainer(eq(container2.getId())); eventHandler.verifyNoInvocations(AMContainerEventStopRequest.class); eventHandler.reset(); @@ -1516,7 +1548,7 @@ eventHandler, rmClient, new AlwaysMatchesContainerMatcher(), @Test(timeout=5000) public void testEnvironmentVarsContainerReuse() throws Exception { - Configuration tezConf = new Configuration(new YarnConfiguration()); + Configuration tezConf = new Configuration(); tezConf.setBoolean(TezConfiguration.TEZ_AM_CONTAINER_REUSE_ENABLED, true); tezConf.setBoolean(TezConfiguration.TEZ_AM_CONTAINER_REUSE_RACK_FALLBACK_ENABLED, true); tezConf.setLong(TezConfiguration.TEZ_AM_CONTAINER_REUSE_LOCALITY_DELAY_ALLOCATION_MILLIS, 0); @@ -1524,6 +1556,7 @@ public void testEnvironmentVarsContainerReuse() throws Exception { tezConf.setLong(TezConfiguration.TEZ_AM_CONTAINER_IDLE_RELEASE_TIMEOUT_MAX_MILLIS, 0); CapturingEventHandler eventHandler = new CapturingEventHandler(); + DAG dag = mock(DAG.class); TezDAGID dagID = TezDAGID.getInstance("0", 0, 0); AMRMClient rmClientCore = new AMRMClientForTest(); @@ -1538,6 +1571,7 @@ public void testEnvironmentVarsContainerReuse() throws Exception { doReturn(amContainerMap).when(appContext).getAllContainers(); doReturn(amNodeTracker).when(appContext).getNodeTracker(); doReturn(DAGAppMasterState.RUNNING).when(appContext).getAMState(); + doReturn(dag).when(appContext).getCurrentDAG(); doReturn(dagID).when(appContext).getCurrentDAGID(); doReturn(mock(ClusterInfo.class)).when(appContext).getClusterInfo(); @@ -1606,14 +1640,14 @@ eventHandler, rmClient, new ContainerContextMatcher(), taskScheduler.onContainersAllocated(Collections.singletonList(container1)); TestTaskSchedulerHelpers.waitForDelayedDrainNotify(drainNotifier); drainableAppCallback.drain(); - verify(taskSchedulerManager).taskAllocated(eq(0), eq(ta11), any(Object.class), eq(container1)); + verify(taskSchedulerManager).taskAllocated(eq(0), eq(ta11), any(), eq(container1)); // finish ta11, should start ta13 taskSchedulerManager.handleEvent(new AMSchedulerEventTAEnded(ta11, container1.getId(), TaskAttemptState.SUCCEEDED, null, null, 0)); drainableAppCallback.drain(); verifyDeAllocateTask(taskScheduler, ta11, true, null, null); - verify(taskSchedulerManager).taskAllocated(eq(0), eq(ta13), any(Object.class), eq(container1)); + verify(taskSchedulerManager).taskAllocated(eq(0), eq(ta13), any(), eq(container1)); verify(rmClient, times(0)).releaseAssignedContainer(eq(container1.getId())); eventHandler.verifyNoInvocations(AMContainerEventStopRequest.class); eventHandler.reset(); @@ -1631,7 +1665,7 @@ eventHandler, rmClient, new ContainerContextMatcher(), taskScheduler.onContainersAllocated(Collections.singletonList(container2)); TestTaskSchedulerHelpers.waitForDelayedDrainNotify(drainNotifier); drainableAppCallback.drain(); - verify(taskSchedulerManager).taskAllocated(eq(0), eq(ta12), any(Object.class), eq(container2)); + verify(taskSchedulerManager).taskAllocated(eq(0), eq(ta12), any(), eq(container2)); // ta12 finished, cannot reuse container, should release container2 taskSchedulerManager.handleEvent(new AMSchedulerEventTAEnded(ta12, container2.getId(), @@ -1646,7 +1680,7 @@ eventHandler, rmClient, new ContainerContextMatcher(), taskScheduler.onContainersAllocated(Collections.singletonList(container3)); TestTaskSchedulerHelpers.waitForDelayedDrainNotify(drainNotifier); drainableAppCallback.drain(); - verify(taskSchedulerManager).taskAllocated(eq(0), eq(ta14), any(Object.class), eq(container3)); + verify(taskSchedulerManager).taskAllocated(eq(0), eq(ta14), any(), eq(container3)); // ta14 finished, should release container3 taskSchedulerManager.handleEvent(new AMSchedulerEventTAEnded(ta14, container3.getId(), diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestDagAwareYarnTaskScheduler.java b/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestDagAwareYarnTaskScheduler.java new file mode 100644 index 0000000000..1465bfaaf6 --- /dev/null +++ b/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestDagAwareYarnTaskScheduler.java @@ -0,0 +1,1746 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tez.dag.app.rm; + +import com.google.common.collect.Sets; +import com.google.common.util.concurrent.ThreadFactoryBuilder; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterResponse; +import org.apache.hadoop.yarn.api.records.ApplicationAccessType; +import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; +import org.apache.hadoop.yarn.api.records.ApplicationId; +import org.apache.hadoop.yarn.api.records.Container; +import org.apache.hadoop.yarn.api.records.ContainerId; +import org.apache.hadoop.yarn.api.records.ContainerStatus; +import org.apache.hadoop.yarn.api.records.FinalApplicationStatus; +import org.apache.hadoop.yarn.api.records.NodeId; +import org.apache.hadoop.yarn.api.records.NodeReport; +import org.apache.hadoop.yarn.api.records.Priority; +import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.client.api.async.AMRMClientAsync; +import org.apache.hadoop.yarn.client.api.impl.AMRMClientImpl; +import org.apache.hadoop.yarn.exceptions.YarnException; +import org.apache.hadoop.yarn.proto.YarnServiceProtos.SchedulerResourceTypes; +import org.apache.hadoop.yarn.util.resource.Resources; +import org.apache.tez.common.MockDNSToSwitchMapping; +import org.apache.tez.dag.api.TezConfiguration; +import org.apache.tez.dag.app.MockClock; +import org.apache.tez.dag.app.dag.Task; +import org.apache.tez.dag.app.dag.TaskAttempt; +import org.apache.tez.dag.app.rm.DagAwareYarnTaskScheduler.AMRMClientAsyncWrapper; +import org.apache.tez.dag.app.rm.DagAwareYarnTaskScheduler.HeldContainer; +import org.apache.tez.dag.app.rm.DagAwareYarnTaskScheduler.TaskRequest; +import org.apache.tez.dag.app.rm.TestTaskSchedulerHelpers.TaskSchedulerContextDrainable; +import org.apache.tez.serviceplugins.api.DagInfo; +import org.apache.tez.serviceplugins.api.TaskScheduler; +import org.apache.tez.serviceplugins.api.TaskSchedulerContext; +import org.apache.tez.serviceplugins.api.TaskSchedulerContext.AppFinalStatus; +import org.apache.tez.test.ControlledScheduledExecutorService; +import org.junit.After; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; +import org.mockito.ArgumentCaptor; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.BitSet; +import java.util.Collections; +import java.util.EnumSet; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.ScheduledExecutorService; + +import static org.apache.tez.dag.app.rm.TestTaskSchedulerHelpers.createCountingExecutingService; +import static org.apache.tez.dag.app.rm.TestTaskSchedulerHelpers.setupMockTaskSchedulerContext; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; +import static org.mockito.Mockito.any; +import static org.mockito.Mockito.anyInt; +import static org.mockito.Mockito.anyList; +import static org.mockito.Mockito.eq; +import static org.mockito.Mockito.isNull; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.never; +import static org.mockito.Mockito.spy; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +public class TestDagAwareYarnTaskScheduler { + private ExecutorService contextCallbackExecutor; + + @BeforeClass + public static void beforeClass() { + + MockDNSToSwitchMapping.initializeMockRackResolver(); + } + + @Before + public void preTest() { + contextCallbackExecutor = Executors.newSingleThreadExecutor( + new ThreadFactoryBuilder().setNameFormat("TaskSchedulerAppCallbackExecutor #%d") + .setDaemon(true) + .build()); + } + + @After + public void postTest() { + contextCallbackExecutor.shutdownNow(); + } + + private TaskSchedulerContextDrainable createDrainableContext( + TaskSchedulerContext taskSchedulerContext) { + TaskSchedulerContextImplWrapper wrapper = + new TaskSchedulerContextImplWrapper(taskSchedulerContext, + createCountingExecutingService(contextCallbackExecutor)); + return new TaskSchedulerContextDrainable(wrapper); + } + + @SuppressWarnings({ "unchecked" }) + @Test(timeout=30000) + public void testNoReuse() throws Exception { + AMRMClientAsyncWrapperForTest mockRMClient = spy(new AMRMClientAsyncWrapperForTest()); + + String appHost = "host"; + int appPort = 0; + String appUrl = "url"; + + Configuration conf = new Configuration(); + conf.setBoolean(TezConfiguration.TEZ_AM_CONTAINER_REUSE_ENABLED, false); + conf.setInt(TezConfiguration.TEZ_AM_RM_HEARTBEAT_INTERVAL_MS_MAX, 100); + + DagInfo mockDagInfo = mock(DagInfo.class); + when(mockDagInfo.getTotalVertices()).thenReturn(10); + when(mockDagInfo.getVertexDescendants(anyInt())).thenReturn(new BitSet()); + TaskSchedulerContext mockApp = setupMockTaskSchedulerContext(appHost, appPort, appUrl, conf); + when(mockApp.getCurrentDagInfo()).thenReturn(mockDagInfo); + TaskSchedulerContextDrainable drainableAppCallback = createDrainableContext(mockApp); + + MockClock clock = new MockClock(1000); + NewTaskSchedulerForTest scheduler = new NewTaskSchedulerForTest(drainableAppCallback, + mockRMClient, clock); + + scheduler.initialize(); + drainableAppCallback.drain(); + + scheduler.start(); + drainableAppCallback.drain(); + verify(mockRMClient).start(); + verify(mockRMClient).registerApplicationMaster(appHost, appPort, appUrl); + RegisterApplicationMasterResponse regResponse = mockRMClient.getRegistrationResponse(); + verify(mockApp).setApplicationRegistrationData(regResponse.getMaximumResourceCapability(), + regResponse.getApplicationACLs(), regResponse.getClientToAMTokenMasterKey(), + regResponse.getQueue()); + + assertEquals(scheduler.getClusterNodeCount(), mockRMClient.getClusterNodeCount()); + + Object mockTask1 = new MockTask("task1"); + Object mockCookie1 = new Object(); + Resource mockCapability = Resources.createResource(1024, 1); + String[] hosts = {"host1", "host5"}; + String[] racks = {"/default-rack", "/default-rack"}; + Priority mockPriority = Priority.newInstance(1); + ArgumentCaptor requestCaptor = + ArgumentCaptor.forClass(TaskRequest.class); + // allocate task + scheduler.allocateTask(mockTask1, mockCapability, hosts, + racks, mockPriority, null, mockCookie1); + drainableAppCallback.drain(); + verify(mockRMClient, times(1)). + addContainerRequest(any()); + + // returned from task requests before allocation happens + assertFalse(scheduler.deallocateTask(mockTask1, true, null, null)); + verify(mockApp, times(0)).containerBeingReleased(any()); + verify(mockRMClient, times(1)). + removeContainerRequest(any()); + verify(mockRMClient, times(0)). + releaseAssignedContainer(any()); + + // deallocating unknown task + assertFalse(scheduler.deallocateTask(mockTask1, true, null, null)); + verify(mockApp, times(0)).containerBeingReleased(any()); + verify(mockRMClient, times(1)). + removeContainerRequest(any()); + verify(mockRMClient, times(0)). + releaseAssignedContainer(any()); + + // allocate tasks + Object mockTask2 = new MockTask("task2"); + Object mockCookie2 = new Object(); + Object mockTask3 = new MockTask("task3"); + Object mockCookie3 = new Object(); + scheduler.allocateTask(mockTask1, mockCapability, hosts, + racks, mockPriority, null, mockCookie1); + drainableAppCallback.drain(); + verify(mockRMClient, times(2)). + addContainerRequest(requestCaptor.capture()); + TaskRequest request1 = requestCaptor.getValue(); + scheduler.allocateTask(mockTask2, mockCapability, hosts, + racks, mockPriority, null, mockCookie2); + drainableAppCallback.drain(); + verify(mockRMClient, times(3)). + addContainerRequest(requestCaptor.capture()); + TaskRequest request2 = requestCaptor.getValue(); + scheduler.allocateTask(mockTask3, mockCapability, hosts, + racks, mockPriority, null, mockCookie3); + drainableAppCallback.drain(); + verify(mockRMClient, times(4)). + addContainerRequest(requestCaptor.capture()); + TaskRequest request3 = requestCaptor.getValue(); + + NodeId host1 = NodeId.newInstance("host1", 1); + NodeId host2 = NodeId.newInstance("host2", 2); + NodeId host3 = NodeId.newInstance("host3", 3); + NodeId host4 = NodeId.newInstance("host4", 4); + ApplicationAttemptId attemptId = ApplicationAttemptId.newInstance(ApplicationId.newInstance(1, 1), 1); + ContainerId mockCId1 = ContainerId.newContainerId(attemptId, 1); + Container mockContainer1 = Container.newInstance(mockCId1, host1, null, mockCapability, mockPriority, null); + ContainerId mockCId2 = ContainerId.newContainerId(attemptId, 2); + Container mockContainer2 = Container.newInstance(mockCId2, host2, null, mockCapability, mockPriority, null); + ContainerId mockCId3 = ContainerId.newContainerId(attemptId, 3); + Container mockContainer3 = Container.newInstance(mockCId3, host3, null, mockCapability, mockPriority, null); + ContainerId mockCId4 = ContainerId.newContainerId(attemptId, 4); + Container mockContainer4 = Container.newInstance(mockCId4, host4, null, mockCapability, mockPriority, null); + List containers = new ArrayList<>(); + containers.add(mockContainer1); + containers.add(mockContainer2); + containers.add(mockContainer3); + containers.add(mockContainer4); + scheduler.onContainersAllocated(containers); + drainableAppCallback.drain(); + // first container allocated + verify(mockApp).taskAllocated(mockTask1, mockCookie1, mockContainer1); + verify(mockApp).taskAllocated(mockTask2, mockCookie2, mockContainer2); + verify(mockApp).taskAllocated(mockTask3, mockCookie3, mockContainer3); + // no other allocations returned + verify(mockApp, times(3)).taskAllocated(any(), any(), any()); + verify(mockRMClient).removeContainerRequest(request1); + verify(mockRMClient).removeContainerRequest(request2); + verify(mockRMClient).removeContainerRequest(request3); + // verify unwanted container released + verify(mockRMClient).releaseAssignedContainer(mockCId4); + + // deallocate allocated task + assertTrue(scheduler.deallocateTask(mockTask1, true, null, null)); + drainableAppCallback.drain(); + verify(mockApp).containerBeingReleased(mockCId1); + verify(mockRMClient).releaseAssignedContainer(mockCId1); + // deallocate allocated container + assertEquals(mockTask2, scheduler.deallocateContainer(mockCId2)); + drainableAppCallback.drain(); + verify(mockRMClient).releaseAssignedContainer(mockCId2); + verify(mockRMClient, times(3)).releaseAssignedContainer(any()); + + List statuses = new ArrayList<>(); + ContainerStatus mockStatus1 = mock(ContainerStatus.class); + when(mockStatus1.getContainerId()).thenReturn(mockCId1); + statuses.add(mockStatus1); + ContainerStatus mockStatus2 = mock(ContainerStatus.class); + when(mockStatus2.getContainerId()).thenReturn(mockCId2); + statuses.add(mockStatus2); + ContainerStatus mockStatus3 = mock(ContainerStatus.class); + when(mockStatus3.getContainerId()).thenReturn(mockCId3); + statuses.add(mockStatus3); + ContainerStatus mockStatus4 = mock(ContainerStatus.class); + when(mockStatus4.getContainerId()).thenReturn(mockCId4); + statuses.add(mockStatus4); + + scheduler.onContainersCompleted(statuses); + drainableAppCallback.drain(); + // released container status returned + verify(mockApp).containerCompleted(mockTask1, mockStatus1); + verify(mockApp).containerCompleted(mockTask2, mockStatus2); + // currently allocated container status returned and not released + verify(mockApp).containerCompleted(mockTask3, mockStatus3); + // no other statuses returned + verify(mockApp, times(3)).containerCompleted(any(), any()); + verify(mockRMClient, times(3)).releaseAssignedContainer(any()); + + // verify blacklisting + verify(mockRMClient, times(0)).updateBlacklist(anyList(), anyList()); + String badHost = "host6"; + NodeId badNodeId = NodeId.newInstance(badHost, 1); + scheduler.blacklistNode(badNodeId); + List badNodeList = Collections.singletonList(badHost); + verify(mockRMClient, times(1)).updateBlacklist(eq(badNodeList), isNull()); + Object mockTask4 = new MockTask("task4"); + Object mockCookie4 = new Object(); + scheduler.allocateTask(mockTask4, mockCapability, null, + null, mockPriority, null, mockCookie4); + drainableAppCallback.drain(); + verify(mockRMClient, times(5)).addContainerRequest(requestCaptor.capture()); + ContainerId mockCId5 = ContainerId.newContainerId(attemptId, 5); + Container mockContainer5 = Container.newInstance(mockCId5, badNodeId, null, mockCapability, mockPriority, null); + containers.clear(); + containers.add(mockContainer5); + scheduler.onContainersAllocated(containers); + drainableAppCallback.drain(); + // no new allocation + verify(mockApp, times(3)).taskAllocated(any(), any(), any()); + // verify blacklisted container released + verify(mockRMClient).releaseAssignedContainer(mockCId5); + verify(mockRMClient, times(4)).releaseAssignedContainer(any()); + // verify request added back + verify(mockRMClient, times(6)).addContainerRequest(requestCaptor.capture()); + NodeId host6 = NodeId.newInstance("host6", 6); + ContainerId mockCId6 = ContainerId.newContainerId(attemptId, 6); + Container mockContainer6 = Container.newInstance(mockCId6, host6, null, mockCapability, mockPriority, null); + containers.clear(); + containers.add(mockContainer6); + scheduler.onContainersAllocated(containers); + drainableAppCallback.drain(); + // new allocation + verify(mockApp, times(4)).taskAllocated(any(), any(), any()); + verify(mockApp).taskAllocated(mockTask4, mockCookie4, mockContainer6); + // deallocate allocated task + assertTrue(scheduler.deallocateTask(mockTask4, true, null, null)); + drainableAppCallback.drain(); + verify(mockApp).containerBeingReleased(mockCId6); + verify(mockRMClient).releaseAssignedContainer(mockCId6); + verify(mockRMClient, times(5)).releaseAssignedContainer(any()); + // test unblacklist + scheduler.unblacklistNode(badNodeId); + verify(mockRMClient, times(1)).updateBlacklist(isNull(), eq(badNodeList)); + assertEquals(0, scheduler.getNumBlacklistedNodes()); + + float progress = 0.5f; + when(mockApp.getProgress()).thenReturn(progress); + assertEquals(progress, scheduler.getProgress(), 0); + + // check duplicate allocation request + scheduler.allocateTask(mockTask1, mockCapability, hosts, racks, + mockPriority, null, mockCookie1); + drainableAppCallback.drain(); + verify(mockRMClient, times(7)).addContainerRequest(any()); + verify(mockRMClient, times(6)). + removeContainerRequest(any()); + scheduler.allocateTask(mockTask1, mockCapability, hosts, racks, + mockPriority, null, mockCookie1); + drainableAppCallback.drain(); + // old request removed and new one added + verify(mockRMClient, times(7)). + removeContainerRequest(any()); + verify(mockRMClient, times(8)).addContainerRequest(any()); + assertFalse(scheduler.deallocateTask(mockTask1, true, null, null)); + + // test speculative node adjustment + String speculativeNode = "host8"; + NodeId speculativeNodeId = mock(NodeId.class); + when(speculativeNodeId.getHost()).thenReturn(speculativeNode); + TaskAttempt mockTask5 = mock(TaskAttempt.class); + Task task = mock(Task.class); + when(mockTask5.getTask()).thenReturn(task); + when(task.getNodesWithRunningAttempts()).thenReturn(Sets.newHashSet(speculativeNodeId)); + Object mockCookie5 = new Object(); + scheduler.allocateTask(mockTask5, mockCapability, hosts, racks, + mockPriority, null, mockCookie5); + drainableAppCallback.drain(); + // no new allocation + verify(mockApp, times(4)).taskAllocated(any(), any(), any()); + // verify container released + verify(mockRMClient, times(5)).releaseAssignedContainer(any()); + // verify request added back + verify(mockRMClient, times(9)).addContainerRequest(requestCaptor.capture()); + + List mockUpdatedNodes = mock(List.class); + scheduler.onNodesUpdated(mockUpdatedNodes); + drainableAppCallback.drain(); + verify(mockApp).nodesUpdated(mockUpdatedNodes); + + ArgumentCaptor argumentCaptor = ArgumentCaptor.forClass(String.class); + Exception mockException = new IOException("mockexception"); + scheduler.onError(mockException); + drainableAppCallback.drain(); + verify(mockApp) + .reportError(eq(YarnTaskSchedulerServiceError.RESOURCEMANAGER_ERROR), argumentCaptor.capture(), + any()); + assertTrue(argumentCaptor.getValue().contains("mockexception")); + + scheduler.onShutdownRequest(); + drainableAppCallback.drain(); + verify(mockApp).appShutdownRequested(); + + String appMsg = "success"; + AppFinalStatus finalStatus = + new AppFinalStatus(FinalApplicationStatus.SUCCEEDED, appMsg, appUrl); + when(mockApp.getFinalAppStatus()).thenReturn(finalStatus); + scheduler.shutdown(); + drainableAppCallback.drain(); + verify(mockRMClient). + unregisterApplicationMaster(FinalApplicationStatus.SUCCEEDED, + appMsg, appUrl); + verify(mockRMClient).stop(); + } + + @Test(timeout=30000) + public void testSimpleReuseLocalMatching() throws Exception { + AMRMClientAsyncWrapperForTest mockRMClient = spy(new AMRMClientAsyncWrapperForTest()); + + String appHost = "host"; + int appPort = 0; + String appUrl = "url"; + + Configuration conf = new Configuration(); + conf.setBoolean(TezConfiguration.TEZ_AM_CONTAINER_REUSE_ENABLED, true); + conf.setInt(TezConfiguration.TEZ_AM_CONTAINER_REUSE_LOCALITY_DELAY_ALLOCATION_MILLIS, 100); + conf.setBoolean(TezConfiguration.TEZ_AM_CONTAINER_REUSE_RACK_FALLBACK_ENABLED, false); + conf.setBoolean(TezConfiguration.TEZ_AM_CONTAINER_REUSE_NON_LOCAL_FALLBACK_ENABLED, false); + conf.setInt(TezConfiguration.TEZ_AM_RM_HEARTBEAT_INTERVAL_MS_MAX, 100); + + DagInfo mockDagInfo = mock(DagInfo.class); + when(mockDagInfo.getTotalVertices()).thenReturn(10); + when(mockDagInfo.getVertexDescendants(anyInt())).thenReturn(new BitSet()); + TaskSchedulerContext mockApp = setupMockTaskSchedulerContext(appHost, appPort, appUrl, conf); + when(mockApp.getCurrentDagInfo()).thenReturn(mockDagInfo); + TaskSchedulerContextDrainable drainableAppCallback = createDrainableContext(mockApp); + + MockClock clock = new MockClock(1000); + NewTaskSchedulerForTest scheduler = new NewTaskSchedulerForTest(drainableAppCallback, + mockRMClient, clock); + + scheduler.initialize(); + drainableAppCallback.drain(); + + scheduler.start(); + drainableAppCallback.drain(); + verify(mockRMClient).start(); + verify(mockRMClient).registerApplicationMaster(appHost, appPort, appUrl); + RegisterApplicationMasterResponse regResponse = mockRMClient.getRegistrationResponse(); + verify(mockApp).setApplicationRegistrationData(regResponse.getMaximumResourceCapability(), + regResponse.getApplicationACLs(), regResponse.getClientToAMTokenMasterKey(), + regResponse.getQueue()); + + assertEquals(scheduler.getClusterNodeCount(), mockRMClient.getClusterNodeCount()); + + Priority priorityv0 = Priority.newInstance(1); + Priority priorityv1 = Priority.newInstance(2); + String[] hostsv0t0 = { "host1", "host2" }; + MockTaskInfo taskv0t0 = new MockTaskInfo("taskv0t0", priorityv0, hostsv0t0); + MockTaskInfo taskv0t1 = new MockTaskInfo("taskv0t1", priorityv0, "host3"); + MockTaskInfo taskv0t2 = new MockTaskInfo("taskv0t2", priorityv0, hostsv0t0); + MockTaskInfo taskv1t0 = new MockTaskInfo("taskv1t0", priorityv1, hostsv0t0); + MockTaskInfo taskv1t1 = new MockTaskInfo("taskv1t1", priorityv1, hostsv0t0); + + TaskRequestCaptor taskRequestCaptor = new TaskRequestCaptor(mockRMClient, + scheduler, drainableAppCallback); + TaskRequest reqv0t0 = taskRequestCaptor.scheduleTask(taskv0t0); + taskRequestCaptor.scheduleTask(taskv0t1); + TaskRequest reqv0t2 = taskRequestCaptor.scheduleTask(taskv0t2); + TaskRequest reqv1t0 = taskRequestCaptor.scheduleTask(taskv1t0); + taskRequestCaptor.scheduleTask(taskv1t1); + + NodeId host1 = NodeId.newInstance("host1", 1); + ApplicationAttemptId attemptId = ApplicationAttemptId.newInstance(ApplicationId.newInstance(1, 1), 1); + ContainerId cid1 = ContainerId.newContainerId(attemptId, 1); + Container container1 = Container.newInstance(cid1, host1, null, taskv0t0.capability, priorityv0, null); + + // allocate one container at v0 priority + scheduler.onContainersAllocated(Collections.singletonList(container1)); + drainableAppCallback.drain(); + verify(mockApp).taskAllocated(taskv0t0.task, taskv0t0.cookie, container1); + verify(mockRMClient).removeContainerRequest(reqv0t0); + + // finish v0t0 successfully, verify v0t1 is skipped and v0t2 instead is assigned to the container + assertTrue(scheduler.deallocateTask(taskv0t0.task, true, null, null)); + clock.incrementTime(10000); + drainableAppCallback.drain(); + verify(mockApp, never()).containerBeingReleased(any()); + verify(mockRMClient, never()).releaseAssignedContainer(any()); + verify(mockApp).taskAllocated(taskv0t2.task, taskv0t2.cookie, container1); + verify(mockRMClient).removeContainerRequest(reqv0t2); + + // finish v0t2 successfully, verify v1t0 is assigned to the same container + assertTrue(scheduler.deallocateTask(taskv0t2.task, true, null, null)); + clock.incrementTime(10000); + drainableAppCallback.drain(); + verify(mockApp, never()).containerBeingReleased(any()); + verify(mockRMClient, never()).releaseAssignedContainer(any()); + verify(mockApp).taskAllocated(taskv1t0.task, taskv1t0.cookie, container1); + verify(mockRMClient).removeContainerRequest(reqv1t0); + + // fail v1t0 and verify container is released instead of reused for v1t1 + assertTrue(scheduler.deallocateTask(taskv1t0.task, false, null, null)); + clock.incrementTime(10000); + drainableAppCallback.drain(); + verify(mockApp).containerBeingReleased(cid1); + verify(mockRMClient).releaseAssignedContainer(cid1); + + String appMsg = "success"; + AppFinalStatus finalStatus = + new AppFinalStatus(FinalApplicationStatus.SUCCEEDED, appMsg, appUrl); + when(mockApp.getFinalAppStatus()).thenReturn(finalStatus); + scheduler.shutdown(); + drainableAppCallback.drain(); + verify(mockRMClient). + unregisterApplicationMaster(FinalApplicationStatus.SUCCEEDED, + appMsg, appUrl); + verify(mockRMClient).stop(); + } + + @Test(timeout=30000) + public void testSimpleReuseRackMatching() throws Exception { + AMRMClientAsyncWrapperForTest mockRMClient = spy(new AMRMClientAsyncWrapperForTest()); + + String appHost = "host"; + int appPort = 0; + String appUrl = "url"; + + Configuration conf = new Configuration(); + conf.setBoolean(TezConfiguration.TEZ_AM_CONTAINER_REUSE_ENABLED, true); + conf.setInt(TezConfiguration.TEZ_AM_CONTAINER_REUSE_LOCALITY_DELAY_ALLOCATION_MILLIS, 100); + conf.setBoolean(TezConfiguration.TEZ_AM_CONTAINER_REUSE_RACK_FALLBACK_ENABLED, true); + conf.setBoolean(TezConfiguration.TEZ_AM_CONTAINER_REUSE_NON_LOCAL_FALLBACK_ENABLED, false); + conf.setInt(TezConfiguration.TEZ_AM_RM_HEARTBEAT_INTERVAL_MS_MAX, 100); + + DagInfo mockDagInfo = mock(DagInfo.class); + when(mockDagInfo.getTotalVertices()).thenReturn(10); + when(mockDagInfo.getVertexDescendants(anyInt())).thenReturn(new BitSet()); + TaskSchedulerContext mockApp = setupMockTaskSchedulerContext(appHost, appPort, appUrl, conf); + when(mockApp.getCurrentDagInfo()).thenReturn(mockDagInfo); + TaskSchedulerContextDrainable drainableAppCallback = createDrainableContext(mockApp); + + MockClock clock = new MockClock(1000); + NewTaskSchedulerForTest scheduler = new NewTaskSchedulerForTest(drainableAppCallback, + mockRMClient, clock); + + scheduler.initialize(); + drainableAppCallback.drain(); + + scheduler.start(); + drainableAppCallback.drain(); + verify(mockRMClient).start(); + verify(mockRMClient).registerApplicationMaster(appHost, appPort, appUrl); + RegisterApplicationMasterResponse regResponse = mockRMClient.getRegistrationResponse(); + verify(mockApp).setApplicationRegistrationData(regResponse.getMaximumResourceCapability(), + regResponse.getApplicationACLs(), regResponse.getClientToAMTokenMasterKey(), + regResponse.getQueue()); + + assertEquals(scheduler.getClusterNodeCount(), mockRMClient.getClusterNodeCount()); + + Priority priorityv0 = Priority.newInstance(1); + Priority priorityv1 = Priority.newInstance(2); + String[] hostsv0t0 = { "host1", "host2" }; + MockTaskInfo taskv0t0 = new MockTaskInfo("taskv0t0", priorityv0, hostsv0t0); + MockTaskInfo taskv0t1 = new MockTaskInfo("taskv0t1", priorityv0, "host2"); + MockTaskInfo taskv0t2 = new MockTaskInfo("taskv0t2", priorityv0, "host4", "/somerack"); + MockTaskInfo taskv1t0 = new MockTaskInfo("taskv1t0", priorityv1, "host1"); + MockTaskInfo taskv1t1 = new MockTaskInfo("taskv1t1", priorityv1, "host5"); + + TaskRequestCaptor taskRequestCaptor = new TaskRequestCaptor(mockRMClient, + scheduler, drainableAppCallback); + TaskRequest reqv0t0 = taskRequestCaptor.scheduleTask(taskv0t0); + TaskRequest reqv0t1 = taskRequestCaptor.scheduleTask(taskv0t1); + taskRequestCaptor.scheduleTask(taskv0t2); + TaskRequest reqv1t0 = taskRequestCaptor.scheduleTask(taskv1t0); + taskRequestCaptor.scheduleTask(taskv1t1); + + NodeId host1 = NodeId.newInstance("host1", 1); + ApplicationAttemptId attemptId = ApplicationAttemptId.newInstance(ApplicationId.newInstance(1, 1), 1); + ContainerId cid1 = ContainerId.newContainerId(attemptId, 1); + Container container1 = Container.newInstance(cid1, host1, null, taskv0t0.capability, priorityv0, null); + + // allocate one container at v0 priority + scheduler.onContainersAllocated(Collections.singletonList(container1)); + drainableAppCallback.drain(); + verify(mockApp).taskAllocated(taskv0t0.task, taskv0t0.cookie, container1); + verify(mockRMClient).removeContainerRequest(reqv0t0); + + // finish v0t0 successfully, verify v0t1 is skipped and v1t0 assigned instead + // since host locality is preferred to rack locality and lower priority vertex + // is not blocked by higher priority vertex + assertTrue(scheduler.deallocateTask(taskv0t0.task, true, null, null)); + clock.incrementTime(10000); + drainableAppCallback.drain(); + verify(mockApp, never()).containerBeingReleased(any()); + verify(mockRMClient, never()).releaseAssignedContainer(any()); + verify(mockApp).taskAllocated(taskv1t0.task, taskv1t0.cookie, container1); + verify(mockRMClient).removeContainerRequest(reqv1t0); + + // finish v1t0 successfully, verify v0t1 is assigned + assertTrue(scheduler.deallocateTask(taskv1t0.task, true, null, null)); + clock.incrementTime(10000); + drainableAppCallback.drain(); + verify(mockApp, never()).containerBeingReleased(any()); + verify(mockRMClient, never()).releaseAssignedContainer(any()); + verify(mockApp).taskAllocated(taskv0t1.task, taskv0t1.cookie, container1); + verify(mockRMClient).removeContainerRequest(reqv0t1); + + // fail v0t1 and verify container is released instead of reused for v1t1 + assertTrue(scheduler.deallocateTask(taskv0t1.task, false, null, null)); + clock.incrementTime(10000); + drainableAppCallback.drain(); + verify(mockApp).containerBeingReleased(cid1); + verify(mockRMClient).releaseAssignedContainer(cid1); + + String appMsg = "success"; + AppFinalStatus finalStatus = + new AppFinalStatus(FinalApplicationStatus.SUCCEEDED, appMsg, appUrl); + when(mockApp.getFinalAppStatus()).thenReturn(finalStatus); + scheduler.shutdown(); + drainableAppCallback.drain(); + verify(mockRMClient). + unregisterApplicationMaster(FinalApplicationStatus.SUCCEEDED, + appMsg, appUrl); + verify(mockRMClient).stop(); + } + + @Test(timeout=30000) + public void testSimpleReuseAnyMatching() throws Exception { + AMRMClientAsyncWrapperForTest mockRMClient = spy(new AMRMClientAsyncWrapperForTest()); + + String appHost = "host"; + int appPort = 0; + String appUrl = "url"; + + Configuration conf = new Configuration(); + conf.setBoolean(TezConfiguration.TEZ_AM_CONTAINER_REUSE_ENABLED, true); + conf.setInt(TezConfiguration.TEZ_AM_CONTAINER_REUSE_LOCALITY_DELAY_ALLOCATION_MILLIS, 100); + conf.setBoolean(TezConfiguration.TEZ_AM_CONTAINER_REUSE_RACK_FALLBACK_ENABLED, true); + conf.setBoolean(TezConfiguration.TEZ_AM_CONTAINER_REUSE_NON_LOCAL_FALLBACK_ENABLED, true); + conf.setInt(TezConfiguration.TEZ_AM_RM_HEARTBEAT_INTERVAL_MS_MAX, 100); + + DagInfo mockDagInfo = mock(DagInfo.class); + when(mockDagInfo.getTotalVertices()).thenReturn(10); + when(mockDagInfo.getVertexDescendants(anyInt())).thenReturn(new BitSet()); + TaskSchedulerContext mockApp = setupMockTaskSchedulerContext(appHost, appPort, appUrl, conf); + when(mockApp.getCurrentDagInfo()).thenReturn(mockDagInfo); + TaskSchedulerContextDrainable drainableAppCallback = createDrainableContext(mockApp); + + MockClock clock = new MockClock(1000); + NewTaskSchedulerForTest scheduler = new NewTaskSchedulerForTest(drainableAppCallback, + mockRMClient, clock); + + scheduler.initialize(); + drainableAppCallback.drain(); + + scheduler.start(); + drainableAppCallback.drain(); + verify(mockRMClient).start(); + verify(mockRMClient).registerApplicationMaster(appHost, appPort, appUrl); + RegisterApplicationMasterResponse regResponse = mockRMClient.getRegistrationResponse(); + verify(mockApp).setApplicationRegistrationData(regResponse.getMaximumResourceCapability(), + regResponse.getApplicationACLs(), regResponse.getClientToAMTokenMasterKey(), + regResponse.getQueue()); + + assertEquals(scheduler.getClusterNodeCount(), mockRMClient.getClusterNodeCount()); + + Priority priorityv0 = Priority.newInstance(1); + Priority priorityv1 = Priority.newInstance(2); + String[] hostsv0t0 = { "host1", "host2" }; + MockTaskInfo taskv0t0 = new MockTaskInfo("taskv0t0", priorityv0, hostsv0t0); + MockTaskInfo taskv0t1 = new MockTaskInfo("taskv0t1", priorityv0, "host2"); + MockTaskInfo taskv0t2 = new MockTaskInfo("taskv0t2", priorityv0, "host4", "/rack4"); + MockTaskInfo taskv1t0 = new MockTaskInfo("taskv1t0", priorityv1, "host1"); + MockTaskInfo taskv1t1 = new MockTaskInfo("taskv1t1", priorityv1, "host6", "/rack6"); + + TaskRequestCaptor taskRequestCaptor = new TaskRequestCaptor(mockRMClient, + scheduler, drainableAppCallback); + TaskRequest reqv0t0 = taskRequestCaptor.scheduleTask(taskv0t0); + TaskRequest reqv0t1 = taskRequestCaptor.scheduleTask(taskv0t1); + TaskRequest reqv0t2 = taskRequestCaptor.scheduleTask(taskv0t2); + TaskRequest reqv1t0 = taskRequestCaptor.scheduleTask(taskv1t0); + taskRequestCaptor.scheduleTask(taskv1t1); + + NodeId host1 = NodeId.newInstance("host1", 1); + ApplicationAttemptId attemptId = ApplicationAttemptId.newInstance(ApplicationId.newInstance(1, 1), 1); + ContainerId cid1 = ContainerId.newContainerId(attemptId, 1); + Container container1 = Container.newInstance(cid1, host1, null, taskv0t0.capability, priorityv0, null); + + // allocate one container at v0 priority + scheduler.onContainersAllocated(Collections.singletonList(container1)); + drainableAppCallback.drain(); + verify(mockApp).taskAllocated(taskv0t0.task, taskv0t0.cookie, container1); + verify(mockRMClient).removeContainerRequest(reqv0t0); + + // finish v0t0 successfully, verify v0t1 is skipped and v1t0 assigned instead + // since host locality is preferred to rack locality and lower priority vertex + // is not blocked by higher priority vertex + assertTrue(scheduler.deallocateTask(taskv0t0.task, true, null, null)); + clock.incrementTime(10000); + drainableAppCallback.drain(); + verify(mockApp, never()).containerBeingReleased(any()); + verify(mockRMClient, never()).releaseAssignedContainer(any()); + verify(mockApp).taskAllocated(taskv1t0.task, taskv1t0.cookie, container1); + verify(mockRMClient).removeContainerRequest(reqv1t0); + + // finish v1t0 successfully, verify v0t1 is assigned + assertTrue(scheduler.deallocateTask(taskv1t0.task, true, null, null)); + clock.incrementTime(10000); + drainableAppCallback.drain(); + verify(mockApp, never()).containerBeingReleased(any()); + verify(mockRMClient, never()).releaseAssignedContainer(any()); + verify(mockApp).taskAllocated(taskv0t1.task, taskv0t1.cookie, container1); + verify(mockRMClient).removeContainerRequest(reqv0t1); + + // finish v0t1 successfully, verify v0t2 is assigned + assertTrue(scheduler.deallocateTask(taskv0t1.task, true, null, null)); + clock.incrementTime(10000); + drainableAppCallback.drain(); + verify(mockApp, never()).containerBeingReleased(any()); + verify(mockRMClient, never()).releaseAssignedContainer(any()); + verify(mockApp).taskAllocated(taskv0t2.task, taskv0t2.cookie, container1); + verify(mockRMClient).removeContainerRequest(reqv0t2); + + // fail v0t2 and verify container is released instead of reused for v1t1 + assertTrue(scheduler.deallocateTask(taskv0t2.task, false, null, null)); + clock.incrementTime(10000); + drainableAppCallback.drain(); + verify(mockApp).containerBeingReleased(cid1); + verify(mockRMClient).releaseAssignedContainer(cid1); + + String appMsg = "success"; + AppFinalStatus finalStatus = + new AppFinalStatus(FinalApplicationStatus.SUCCEEDED, appMsg, appUrl); + when(mockApp.getFinalAppStatus()).thenReturn(finalStatus); + scheduler.shutdown(); + drainableAppCallback.drain(); + verify(mockRMClient). + unregisterApplicationMaster(FinalApplicationStatus.SUCCEEDED, + appMsg, appUrl); + verify(mockRMClient).stop(); + } + + @Test(timeout=30000) + public void testReuseWithAffinity() throws Exception { + AMRMClientAsyncWrapperForTest mockRMClient = spy(new AMRMClientAsyncWrapperForTest()); + + String appHost = "host"; + int appPort = 0; + String appUrl = "url"; + + Configuration conf = new Configuration(); + conf.setBoolean(TezConfiguration.TEZ_AM_CONTAINER_REUSE_ENABLED, true); + conf.setInt(TezConfiguration.TEZ_AM_CONTAINER_REUSE_LOCALITY_DELAY_ALLOCATION_MILLIS, 100); + conf.setBoolean(TezConfiguration.TEZ_AM_CONTAINER_REUSE_RACK_FALLBACK_ENABLED, true); + conf.setBoolean(TezConfiguration.TEZ_AM_CONTAINER_REUSE_NON_LOCAL_FALLBACK_ENABLED, false); + conf.setInt(TezConfiguration.TEZ_AM_RM_HEARTBEAT_INTERVAL_MS_MAX, 100); + + DagInfo mockDagInfo = mock(DagInfo.class); + when(mockDagInfo.getTotalVertices()).thenReturn(10); + when(mockDagInfo.getVertexDescendants(anyInt())).thenReturn(new BitSet()); + TaskSchedulerContext mockApp = setupMockTaskSchedulerContext(appHost, appPort, appUrl, conf); + when(mockApp.getCurrentDagInfo()).thenReturn(mockDagInfo); + TaskSchedulerContextDrainable drainableAppCallback = createDrainableContext(mockApp); + + MockClock clock = new MockClock(1000); + NewTaskSchedulerForTest scheduler = new NewTaskSchedulerForTest(drainableAppCallback, + mockRMClient, clock); + + scheduler.initialize(); + drainableAppCallback.drain(); + + scheduler.start(); + drainableAppCallback.drain(); + verify(mockRMClient).start(); + verify(mockRMClient).registerApplicationMaster(appHost, appPort, appUrl); + RegisterApplicationMasterResponse regResponse = mockRMClient.getRegistrationResponse(); + verify(mockApp).setApplicationRegistrationData(regResponse.getMaximumResourceCapability(), + regResponse.getApplicationACLs(), regResponse.getClientToAMTokenMasterKey(), + regResponse.getQueue()); + + assertEquals(scheduler.getClusterNodeCount(), mockRMClient.getClusterNodeCount()); + + Priority priorityv0 = Priority.newInstance(1); + Priority priorityv1 = Priority.newInstance(2); + String[] hostsv0t0 = { "host1", "host2" }; + MockTaskInfo taskv0t0 = new MockTaskInfo("taskv0t0", priorityv0, hostsv0t0); + MockTaskInfo taskv0t1 = new MockTaskInfo("taskv0t1", priorityv0, hostsv0t0); + + TaskRequestCaptor taskRequestCaptor = new TaskRequestCaptor(mockRMClient, + scheduler, drainableAppCallback); + TaskRequest reqv0t0 = taskRequestCaptor.scheduleTask(taskv0t0); + taskRequestCaptor.scheduleTask(taskv0t1); + + NodeId host1 = NodeId.newInstance("host1", 1); + ApplicationAttemptId attemptId = ApplicationAttemptId.newInstance(ApplicationId.newInstance(1, 1), 1); + ContainerId cid1 = ContainerId.newContainerId(attemptId, 1); + Container container1 = Container.newInstance(cid1, host1, null, taskv0t0.capability, priorityv0, null); + + // allocate one container at v0 priority + scheduler.onContainersAllocated(Collections.singletonList(container1)); + drainableAppCallback.drain(); + verify(mockApp).taskAllocated(taskv0t0.task, taskv0t0.cookie, container1); + verify(mockRMClient).removeContainerRequest(reqv0t0); + + // add a new request for this container + MockTaskInfo taskv1t0 = new MockTaskInfo("taskv1t0", priorityv1, "host1"); + TaskRequest reqv1t0 = taskRequestCaptor.scheduleTask(taskv1t0, cid1); + + // finish v0t0 successfully, verify v0t1 is skipped even though it is node-local + // and v1t0 assigned instead for affinity + assertTrue(scheduler.deallocateTask(taskv0t0.task, true, null, null)); + clock.incrementTime(10000); + drainableAppCallback.drain(); + verify(mockApp, never()).containerBeingReleased(any()); + verify(mockRMClient, never()).releaseAssignedContainer(any()); + verify(mockApp).taskAllocated(taskv1t0.task, taskv1t0.cookie, container1); + verify(mockRMClient).removeContainerRequest(reqv1t0); + + String appMsg = "success"; + AppFinalStatus finalStatus = + new AppFinalStatus(FinalApplicationStatus.SUCCEEDED, appMsg, appUrl); + when(mockApp.getFinalAppStatus()).thenReturn(finalStatus); + scheduler.shutdown(); + drainableAppCallback.drain(); + verify(mockRMClient). + unregisterApplicationMaster(FinalApplicationStatus.SUCCEEDED, + appMsg, appUrl); + verify(mockRMClient).stop(); + } + + @Test(timeout=30000) + public void testReuseVertexDescendants() throws Exception { + AMRMClientAsyncWrapperForTest mockRMClient = spy(new AMRMClientAsyncWrapperForTest()); + + String appHost = "host"; + int appPort = 0; + String appUrl = "url"; + + Configuration conf = new Configuration(); + conf.setBoolean(TezConfiguration.TEZ_AM_CONTAINER_REUSE_ENABLED, true); + conf.setInt(TezConfiguration.TEZ_AM_CONTAINER_REUSE_LOCALITY_DELAY_ALLOCATION_MILLIS, 100); + conf.setBoolean(TezConfiguration.TEZ_AM_CONTAINER_REUSE_RACK_FALLBACK_ENABLED, true); + conf.setBoolean(TezConfiguration.TEZ_AM_CONTAINER_REUSE_NON_LOCAL_FALLBACK_ENABLED, false); + conf.setInt(TezConfiguration.TEZ_AM_RM_HEARTBEAT_INTERVAL_MS_MAX, 100); + + // vertex 0 and vertex 2 are root vertices and vertex 1 is a child of vertex 0 + DagInfo mockDagInfo = mock(DagInfo.class); + when(mockDagInfo.getTotalVertices()).thenReturn(3); + when(mockDagInfo.getVertexDescendants(0)).thenReturn(BitSet.valueOf(new long[] { 0x2 })); + when(mockDagInfo.getVertexDescendants(1)).thenReturn(new BitSet()); + when(mockDagInfo.getVertexDescendants(2)).thenReturn(new BitSet()); + TaskSchedulerContext mockApp = setupMockTaskSchedulerContext(appHost, appPort, appUrl, conf); + when(mockApp.getCurrentDagInfo()).thenReturn(mockDagInfo); + TaskSchedulerContextDrainable drainableAppCallback = createDrainableContext(mockApp); + + MockClock clock = new MockClock(1000); + NewTaskSchedulerForTest scheduler = new NewTaskSchedulerForTest(drainableAppCallback, + mockRMClient, clock); + + scheduler.initialize(); + drainableAppCallback.drain(); + + scheduler.start(); + drainableAppCallback.drain(); + verify(mockRMClient).start(); + verify(mockRMClient).registerApplicationMaster(appHost, appPort, appUrl); + RegisterApplicationMasterResponse regResponse = mockRMClient.getRegistrationResponse(); + verify(mockApp).setApplicationRegistrationData(regResponse.getMaximumResourceCapability(), + regResponse.getApplicationACLs(), regResponse.getClientToAMTokenMasterKey(), + regResponse.getQueue()); + + assertEquals(scheduler.getClusterNodeCount(), mockRMClient.getClusterNodeCount()); + + Priority priorityv0 = Priority.newInstance(1); + Priority priorityv1 = Priority.newInstance(2); + Priority priorityv2 = Priority.newInstance(3); + String[] hostsv0t0 = { "host1", "host2" }; + MockTaskInfo taskv0t0 = new MockTaskInfo("taskv0t0", priorityv0, hostsv0t0); + when(mockApp.getVertexIndexForTask(taskv0t0.task)).thenReturn(0); + MockTaskInfo taskv0t1 = new MockTaskInfo("taskv0t1", priorityv0, "host3"); + when(mockApp.getVertexIndexForTask(taskv0t1.task)).thenReturn(0); + MockTaskInfo taskv1t0 = new MockTaskInfo("taskv1t0", priorityv1, hostsv0t0); + when(mockApp.getVertexIndexForTask(taskv1t0.task)).thenReturn(1); + MockTaskInfo taskv2t0 = new MockTaskInfo("taskv2t0", priorityv2, hostsv0t0); + when(mockApp.getVertexIndexForTask(taskv2t0.task)).thenReturn(2); + MockTaskInfo taskv2t1 = new MockTaskInfo("taskv2t1", priorityv2, "host3"); + when(mockApp.getVertexIndexForTask(taskv2t1.task)).thenReturn(2); + + TaskRequestCaptor taskRequestCaptor = new TaskRequestCaptor(mockRMClient, + scheduler, drainableAppCallback); + TaskRequest reqv0t0 = taskRequestCaptor.scheduleTask(taskv0t0); + TaskRequest reqv0t1 = taskRequestCaptor.scheduleTask(taskv0t1); + TaskRequest reqv1t0 = taskRequestCaptor.scheduleTask(taskv1t0); + TaskRequest reqv2t0 = taskRequestCaptor.scheduleTask(taskv2t0); + taskRequestCaptor.scheduleTask(taskv2t1); + + NodeId host1 = NodeId.newInstance("host1", 1); + ApplicationAttemptId attemptId = ApplicationAttemptId.newInstance(ApplicationId.newInstance(1, 1), 1); + ContainerId cid1 = ContainerId.newContainerId(attemptId, 1); + Container container1 = Container.newInstance(cid1, host1, null, taskv0t0.capability, priorityv0, null); + + // allocate one container at v0 priority + scheduler.onContainersAllocated(Collections.singletonList(container1)); + drainableAppCallback.drain(); + verify(mockApp).taskAllocated(taskv0t0.task, taskv0t0.cookie, container1); + verify(mockRMClient).removeContainerRequest(reqv0t0); + + // finish v0t0 successfully, verify v1t0 is skipped and v2t0 assigned instead + // since host locality beats rack locality for unblocked vertex v2 and + // v1 is blocked by pending v0 request + assertTrue(scheduler.deallocateTask(taskv0t0.task, true, null, null)); + clock.incrementTime(10000); + drainableAppCallback.drain(); + verify(mockApp, never()).containerBeingReleased(any()); + verify(mockRMClient, never()).releaseAssignedContainer(any()); + verify(mockApp).taskAllocated(taskv2t0.task, taskv2t0.cookie, container1); + verify(mockRMClient).removeContainerRequest(reqv2t0); + + // finish v2t0 successfully, verify v0t1 is assigned since it is higher + // priority than v2 + assertTrue(scheduler.deallocateTask(taskv2t0.task, true, null, null)); + clock.incrementTime(10000); + drainableAppCallback.drain(); + verify(mockApp, never()).containerBeingReleased(any()); + verify(mockRMClient, never()).releaseAssignedContainer(any()); + verify(mockApp).taskAllocated(taskv0t1.task, taskv0t1.cookie, container1); + verify(mockRMClient).removeContainerRequest(reqv0t1); + + // finish v2t0 successfully, verify v1t0 is assigned since it is now unblocked + assertTrue(scheduler.deallocateTask(taskv0t1.task, true, null, null)); + clock.incrementTime(10000); + drainableAppCallback.drain(); + verify(mockApp, never()).containerBeingReleased(any()); + verify(mockRMClient, never()).releaseAssignedContainer(any()); + verify(mockApp).taskAllocated(taskv1t0.task, taskv1t0.cookie, container1); + verify(mockRMClient).removeContainerRequest(reqv1t0); + + // fail v1t0 and verify container is released instead of reused for v2t1 + assertTrue(scheduler.deallocateTask(taskv1t0.task, false, null, null)); + clock.incrementTime(10000); + drainableAppCallback.drain(); + verify(mockApp).containerBeingReleased(cid1); + verify(mockRMClient).releaseAssignedContainer(cid1); + + String appMsg = "success"; + AppFinalStatus finalStatus = + new AppFinalStatus(FinalApplicationStatus.SUCCEEDED, appMsg, appUrl); + when(mockApp.getFinalAppStatus()).thenReturn(finalStatus); + scheduler.shutdown(); + drainableAppCallback.drain(); + verify(mockRMClient). + unregisterApplicationMaster(FinalApplicationStatus.SUCCEEDED, + appMsg, appUrl); + verify(mockRMClient).stop(); + } + + @Test(timeout=30000) + public void testSessionContainers() throws Exception { + AMRMClientAsyncWrapperForTest mockRMClient = spy(new AMRMClientAsyncWrapperForTest()); + + String appHost = "host"; + int appPort = 0; + String appUrl = "url"; + + Configuration conf = new Configuration(); + conf.setBoolean(TezConfiguration.TEZ_AM_CONTAINER_REUSE_ENABLED, true); + conf.setInt(TezConfiguration.TEZ_AM_CONTAINER_REUSE_LOCALITY_DELAY_ALLOCATION_MILLIS, 100); + conf.setBoolean(TezConfiguration.TEZ_AM_CONTAINER_REUSE_RACK_FALLBACK_ENABLED, true); + conf.setBoolean(TezConfiguration.TEZ_AM_CONTAINER_REUSE_NON_LOCAL_FALLBACK_ENABLED, false); + conf.setInt(TezConfiguration.TEZ_AM_RM_HEARTBEAT_INTERVAL_MS_MAX, 100); + conf.setInt(TezConfiguration.TEZ_AM_CONTAINER_IDLE_RELEASE_TIMEOUT_MIN_MILLIS, 4000); + conf.setInt(TezConfiguration.TEZ_AM_CONTAINER_IDLE_RELEASE_TIMEOUT_MAX_MILLIS, 5000); + conf.setInt(TezConfiguration.TEZ_AM_SESSION_MIN_HELD_CONTAINERS, 5); + + DagInfo mockDagInfo = mock(DagInfo.class); + when(mockDagInfo.getTotalVertices()).thenReturn(10); + when(mockDagInfo.getVertexDescendants(anyInt())).thenReturn(new BitSet()); + TaskSchedulerContext mockApp = setupMockTaskSchedulerContext(appHost, appPort, appUrl, conf); + when(mockApp.getCurrentDagInfo()).thenReturn(mockDagInfo); + when(mockApp.isSession()).thenReturn(true); + TaskSchedulerContextDrainable drainableAppCallback = createDrainableContext(mockApp); + + MockClock clock = new MockClock(1000); + NewTaskSchedulerForTest scheduler = new NewTaskSchedulerForTest(drainableAppCallback, + mockRMClient, clock); + + scheduler.initialize(); + drainableAppCallback.drain(); + + scheduler.start(); + drainableAppCallback.drain(); + verify(mockRMClient).start(); + verify(mockRMClient).registerApplicationMaster(appHost, appPort, appUrl); + RegisterApplicationMasterResponse regResponse = mockRMClient.getRegistrationResponse(); + verify(mockApp).setApplicationRegistrationData(regResponse.getMaximumResourceCapability(), + regResponse.getApplicationACLs(), regResponse.getClientToAMTokenMasterKey(), + regResponse.getQueue()); + + assertEquals(scheduler.getClusterNodeCount(), mockRMClient.getClusterNodeCount()); + + final String rack1 = "/r1"; + final String rack2 = "/r2"; + final String rack3 = "/r3"; + final String node1Rack1 = "n1r1"; + final String node2Rack1 = "n2r1"; + final String node1Rack2 = "n1r2"; + final String node2Rack2 = "n2r2"; + final String node1Rack3 = "n1r3"; + MockDNSToSwitchMapping.addRackMapping(node1Rack1, rack1); + MockDNSToSwitchMapping.addRackMapping(node2Rack1, rack1); + MockDNSToSwitchMapping.addRackMapping(node1Rack2, rack2); + MockDNSToSwitchMapping.addRackMapping(node2Rack2, rack2); + MockDNSToSwitchMapping.addRackMapping(node1Rack3, rack3); + + Priority priorityv0 = Priority.newInstance(1); + MockTaskInfo taskv0t0 = new MockTaskInfo("taskv0t0", priorityv0, node1Rack1, rack1); + MockTaskInfo taskv0t1 = new MockTaskInfo("taskv0t1", priorityv0, node2Rack1, rack1); + MockTaskInfo taskv0t2 = new MockTaskInfo("taskv0t2", priorityv0, node1Rack1, rack1); + MockTaskInfo taskv0t3 = new MockTaskInfo("taskv0t3", priorityv0, node2Rack1, rack1); + MockTaskInfo taskv0t4 = new MockTaskInfo("taskv0t4", priorityv0, node1Rack2, rack2); + MockTaskInfo taskv0t5 = new MockTaskInfo("taskv0t5", priorityv0, node2Rack2, rack2); + MockTaskInfo taskv0t6 = new MockTaskInfo("taskv0t6", priorityv0, node1Rack3, rack3); + + TaskRequestCaptor taskRequestCaptor = new TaskRequestCaptor(mockRMClient, + scheduler, drainableAppCallback); + TaskRequest reqv0t0 = taskRequestCaptor.scheduleTask(taskv0t0); + TaskRequest reqv0t1 = taskRequestCaptor.scheduleTask(taskv0t1); + TaskRequest reqv0t2 = taskRequestCaptor.scheduleTask(taskv0t2); + TaskRequest reqv0t3 = taskRequestCaptor.scheduleTask(taskv0t3); + TaskRequest reqv0t4 = taskRequestCaptor.scheduleTask(taskv0t4); + TaskRequest reqv0t5 = taskRequestCaptor.scheduleTask(taskv0t5); + TaskRequest reqv0t6 = taskRequestCaptor.scheduleTask(taskv0t6); + + List containers = new ArrayList<>(); + ApplicationAttemptId attemptId = ApplicationAttemptId.newInstance(ApplicationId.newInstance(1, 1), 1); + ContainerId cid1 = ContainerId.newContainerId(attemptId, 1); + NodeId n1r1 = NodeId.newInstance(node1Rack1, 1); + Container container1 = Container.newInstance(cid1, n1r1, null, taskv0t0.capability, priorityv0, null); + containers.add(container1); + ContainerId cid2 = ContainerId.newContainerId(attemptId, 2); + NodeId n2r1 = NodeId.newInstance(node2Rack1, 1); + Container container2 = Container.newInstance(cid2, n2r1, null, taskv0t1.capability, priorityv0, null); + containers.add(container2); + ContainerId cid3 = ContainerId.newContainerId(attemptId, 3); + Container container3 = Container.newInstance(cid3, n1r1, null, taskv0t2.capability, priorityv0, null); + containers.add(container3); + ContainerId cid4 = ContainerId.newContainerId(attemptId, 4); + Container container4 = Container.newInstance(cid4, n2r1, null, taskv0t3.capability, priorityv0, null); + containers.add(container4); + ContainerId cid5 = ContainerId.newContainerId(attemptId, 5); + NodeId n1r2 = NodeId.newInstance(node1Rack2, 1); + Container container5 = Container.newInstance(cid5, n1r2, null, taskv0t4.capability, priorityv0, null); + containers.add(container5); + ContainerId cid6 = ContainerId.newContainerId(attemptId, 6); + NodeId n2r2 = NodeId.newInstance(node2Rack2, 1); + Container container6 = Container.newInstance(cid6, n2r2, null, taskv0t5.capability, priorityv0, null); + containers.add(container6); + ContainerId cid7 = ContainerId.newContainerId(attemptId, 7); + NodeId n1r3 = NodeId.newInstance(node1Rack3, 1); + Container container7 = Container.newInstance(cid7, n1r3, null, taskv0t6.capability, priorityv0, null); + containers.add(container7); + + scheduler.onContainersAllocated(containers); + drainableAppCallback.drain(); + verify(mockApp).taskAllocated(taskv0t0.task, taskv0t0.cookie, container1); + verify(mockRMClient).removeContainerRequest(reqv0t0); + verify(mockApp).taskAllocated(taskv0t1.task, taskv0t1.cookie, container2); + verify(mockRMClient).removeContainerRequest(reqv0t1); + verify(mockApp).taskAllocated(taskv0t2.task, taskv0t2.cookie, container3); + verify(mockRMClient).removeContainerRequest(reqv0t2); + verify(mockApp).taskAllocated(taskv0t3.task, taskv0t3.cookie, container4); + verify(mockRMClient).removeContainerRequest(reqv0t3); + verify(mockApp).taskAllocated(taskv0t4.task, taskv0t4.cookie, container5); + verify(mockRMClient).removeContainerRequest(reqv0t4); + verify(mockApp).taskAllocated(taskv0t5.task, taskv0t5.cookie, container6); + verify(mockRMClient).removeContainerRequest(reqv0t5); + verify(mockApp).taskAllocated(taskv0t6.task, taskv0t6.cookie, container7); + verify(mockRMClient).removeContainerRequest(reqv0t6); + + clock.incrementTime(10000); + drainableAppCallback.drain(); + assertTrue(scheduler.deallocateTask(taskv0t0.task, true, null, null)); + assertTrue(scheduler.deallocateTask(taskv0t1.task, true, null, null)); + assertTrue(scheduler.deallocateTask(taskv0t2.task, true, null, null)); + assertTrue(scheduler.deallocateTask(taskv0t3.task, true, null, null)); + assertTrue(scheduler.deallocateTask(taskv0t4.task, true, null, null)); + assertTrue(scheduler.deallocateTask(taskv0t5.task, true, null, null)); + assertTrue(scheduler.deallocateTask(taskv0t6.task, true, null, null)); + verify(mockApp, never()).containerBeingReleased(any()); + verify(mockRMClient, never()).releaseAssignedContainer(any()); + + // verify only two of the containers were released after idle expiration + // and the rest were spread across the nodes and racks + clock.incrementTime(5000); + drainableAppCallback.drain(); + verify(mockApp, times(2)).containerBeingReleased(any()); + verify(mockRMClient, times(2)).releaseAssignedContainer(any()); + Set hosts = new HashSet<>(); + Set racks = new HashSet<>(); + for (HeldContainer hc : scheduler.getSessionContainers()) { + hosts.add(hc.getHost()); + racks.add(hc.getRack()); + } + assertEquals(5, hosts.size()); + assertEquals(3, racks.size()); + assertTrue(hosts.contains(node1Rack1)); + assertTrue(hosts.contains(node2Rack1)); + assertTrue(hosts.contains(node1Rack2)); + assertTrue(hosts.contains(node2Rack2)); + assertTrue(hosts.contains(node1Rack3)); + assertTrue(racks.contains(rack1)); + assertTrue(racks.contains(rack2)); + assertTrue(racks.contains(rack3)); + + String appMsg = "success"; + AppFinalStatus finalStatus = + new AppFinalStatus(FinalApplicationStatus.SUCCEEDED, appMsg, appUrl); + when(mockApp.getFinalAppStatus()).thenReturn(finalStatus); + scheduler.shutdown(); + drainableAppCallback.drain(); + verify(mockRMClient). + unregisterApplicationMaster(FinalApplicationStatus.SUCCEEDED, + appMsg, appUrl); + verify(mockRMClient).stop(); + } + + @Test(timeout=50000) + public void testPreemptionNoHeadroom() throws Exception { + AMRMClientAsyncWrapperForTest mockRMClient = spy(new AMRMClientAsyncWrapperForTest()); + + String appHost = "host"; + int appPort = 0; + String appUrl = "url"; + + Configuration conf = new Configuration(); + conf.setBoolean(TezConfiguration.TEZ_AM_CONTAINER_REUSE_ENABLED, true); + conf.setInt(TezConfiguration.TEZ_AM_CONTAINER_REUSE_LOCALITY_DELAY_ALLOCATION_MILLIS, 100); + conf.setBoolean(TezConfiguration.TEZ_AM_CONTAINER_REUSE_RACK_FALLBACK_ENABLED, true); + conf.setBoolean(TezConfiguration.TEZ_AM_CONTAINER_REUSE_NON_LOCAL_FALLBACK_ENABLED, false); + conf.setInt(TezConfiguration.TEZ_AM_RM_HEARTBEAT_INTERVAL_MS_MAX, 100); + conf.setInt(TezConfiguration.TEZ_AM_PREEMPTION_PERCENTAGE, 10); + conf.setInt(TezConfiguration.TEZ_AM_PREEMPTION_HEARTBEATS_BETWEEN_PREEMPTIONS, 3); + conf.setInt(TezConfiguration.TEZ_AM_PREEMPTION_MAX_WAIT_TIME_MS, 60 * 1000); + + // vertex 0 and vertex 2 are root vertices and vertex 1 is a child of vertex 0 + DagInfo mockDagInfo = mock(DagInfo.class); + when(mockDagInfo.getTotalVertices()).thenReturn(3); + when(mockDagInfo.getVertexDescendants(0)).thenReturn(BitSet.valueOf(new long[] { 0x2 })); + when(mockDagInfo.getVertexDescendants(1)).thenReturn(new BitSet()); + when(mockDagInfo.getVertexDescendants(2)).thenReturn(new BitSet()); + TaskSchedulerContext mockApp = setupMockTaskSchedulerContext(appHost, appPort, appUrl, conf); + when(mockApp.getCurrentDagInfo()).thenReturn(mockDagInfo); + TaskSchedulerContextDrainable drainableAppCallback = createDrainableContext(mockApp); + + MockClock clock = new MockClock(1000); + NewTaskSchedulerForTest scheduler = new NewTaskSchedulerForTest(drainableAppCallback, + mockRMClient, clock); + + scheduler.initialize(); + drainableAppCallback.drain(); + + scheduler.start(); + drainableAppCallback.drain(); + verify(mockRMClient).start(); + verify(mockRMClient).registerApplicationMaster(appHost, appPort, appUrl); + RegisterApplicationMasterResponse regResponse = mockRMClient.getRegistrationResponse(); + verify(mockApp).setApplicationRegistrationData(regResponse.getMaximumResourceCapability(), + regResponse.getApplicationACLs(), regResponse.getClientToAMTokenMasterKey(), + regResponse.getQueue()); + + assertEquals(scheduler.getClusterNodeCount(), mockRMClient.getClusterNodeCount()); + + Priority priorityv0 = Priority.newInstance(1); + Priority priorityv1 = Priority.newInstance(2); + Priority priorityv2 = Priority.newInstance(3); + String[] hostsv0t0 = { "host1", "host2" }; + MockTaskInfo taskv0t0 = new MockTaskInfo("taskv0t0", priorityv0, hostsv0t0); + when(mockApp.getVertexIndexForTask(taskv0t0.task)).thenReturn(0); + MockTaskInfo taskv0t1 = new MockTaskInfo("taskv0t1", priorityv0, hostsv0t0); + when(mockApp.getVertexIndexForTask(taskv0t1.task)).thenReturn(0); + MockTaskInfo taskv1t0 = new MockTaskInfo("taskv1t0", priorityv1, hostsv0t0); + when(mockApp.getVertexIndexForTask(taskv1t0.task)).thenReturn(1); + MockTaskInfo taskv1t1 = new MockTaskInfo("taskv1t1", priorityv1, hostsv0t0); + when(mockApp.getVertexIndexForTask(taskv1t1.task)).thenReturn(1); + MockTaskInfo taskv2t0 = new MockTaskInfo("taskv2t0", priorityv2, hostsv0t0); + when(mockApp.getVertexIndexForTask(taskv2t0.task)).thenReturn(2); + + // asks for two tasks for vertex 1 and start running one of them + TaskRequestCaptor taskRequestCaptor = new TaskRequestCaptor(mockRMClient, + scheduler, drainableAppCallback); + TaskRequest reqv1t0 = taskRequestCaptor.scheduleTask(taskv1t0); + TaskRequest reqv1t1 = taskRequestCaptor.scheduleTask(taskv1t1); + NodeId host1 = NodeId.newInstance("host1", 1); + ApplicationAttemptId attemptId = ApplicationAttemptId.newInstance(ApplicationId.newInstance(1, 1), 1); + ContainerId cid1 = ContainerId.newContainerId(attemptId, 1); + Container container1 = Container.newInstance(cid1, host1, null, taskv1t0.capability, priorityv1, null); + scheduler.onContainersAllocated(Collections.singletonList(container1)); + drainableAppCallback.drain(); + verify(mockApp).taskAllocated(taskv1t0.task, taskv1t0.cookie, container1); + verify(mockRMClient).removeContainerRequest(reqv1t0); + + // start running the other task for vertex 1 a bit later + clock.incrementTime(1000); + ContainerId cid2 = ContainerId.newContainerId(attemptId, 2); + Container container2 = Container.newInstance(cid2, host1, null, taskv1t0.capability, priorityv1, null); + scheduler.onContainersAllocated(Collections.singletonList(container2)); + drainableAppCallback.drain(); + verify(mockApp).taskAllocated(taskv1t1.task, taskv1t1.cookie, container2); + verify(mockRMClient).removeContainerRequest(reqv1t1); + + // add a request for vertex 0 but there is no headroom + when(mockRMClient.getAvailableResources()).thenReturn(Resources.none()); + TaskRequest reqv0t0 = taskRequestCaptor.scheduleTask(taskv0t0); + + // should preempt after enough heartbeats to get past preemption interval + // only the youngest container should be preempted to meet the demand + scheduler.getProgress(); + scheduler.getProgress(); + scheduler.getProgress(); + drainableAppCallback.drain(); + verify(mockApp, times(1)).preemptContainer(any()); + verify(mockApp).preemptContainer(cid2); + assertEquals(taskv1t1.task, scheduler.deallocateContainer(cid2)); + drainableAppCallback.drain(); + verify(mockApp).containerBeingReleased(cid2); + verify(mockRMClient).releaseAssignedContainer(cid2); + verify(mockApp, never()).containerBeingReleased(cid1); + verify(mockRMClient, never()).releaseAssignedContainer(cid1); + + // add a request for vertex 2 and allocate another container + clock.incrementTime(1000); + taskRequestCaptor.scheduleTask(taskv2t0); + ContainerId cid3 = ContainerId.newContainerId(attemptId, 3); + Container container3 = Container.newInstance(cid3, host1, null, taskv0t0.capability, priorityv0, null); + scheduler.onContainersAllocated(Collections.singletonList(container3)); + drainableAppCallback.drain(); + verify(mockApp).taskAllocated(taskv0t0.task, taskv0t0.cookie, container3); + verify(mockRMClient).removeContainerRequest(reqv0t0); + + // no more preemptions since v1 is not a descendant of v2 + scheduler.getProgress(); + scheduler.getProgress(); + scheduler.getProgress(); + drainableAppCallback.drain(); + verify(mockApp, times(1)).preemptContainer(any()); + + // adding request for v0 should trigger preemption on next heartbeat + taskRequestCaptor.scheduleTask(taskv0t1); + scheduler.getProgress(); + drainableAppCallback.drain(); + verify(mockApp, times(2)).preemptContainer(any()); + verify(mockApp).preemptContainer(cid1); + assertEquals(taskv1t0.task, scheduler.deallocateContainer(cid1)); + drainableAppCallback.drain(); + verify(mockApp).containerBeingReleased(cid1); + verify(mockRMClient).releaseAssignedContainer(cid1); + + String appMsg = "success"; + AppFinalStatus finalStatus = + new AppFinalStatus(FinalApplicationStatus.SUCCEEDED, appMsg, appUrl); + when(mockApp.getFinalAppStatus()).thenReturn(finalStatus); + scheduler.shutdown(); + drainableAppCallback.drain(); + verify(mockRMClient). + unregisterApplicationMaster(FinalApplicationStatus.SUCCEEDED, + appMsg, appUrl); + verify(mockRMClient).stop(); + } + + @Test (timeout = 50000L) + public void testPreemptionWhenBlocked() throws Exception { + AMRMClientAsyncWrapperForTest mockRMClient = spy(new AMRMClientAsyncWrapperForTest()); + + String appHost = "host"; + int appPort = 0; + String appUrl = "url"; + + Configuration conf = new Configuration(); + conf.setBoolean(TezConfiguration.TEZ_AM_CONTAINER_REUSE_ENABLED, true); + conf.setInt(TezConfiguration.TEZ_AM_CONTAINER_REUSE_LOCALITY_DELAY_ALLOCATION_MILLIS, 100); + conf.setBoolean(TezConfiguration.TEZ_AM_CONTAINER_REUSE_RACK_FALLBACK_ENABLED, true); + conf.setBoolean(TezConfiguration.TEZ_AM_CONTAINER_REUSE_NON_LOCAL_FALLBACK_ENABLED, false); + conf.setInt(TezConfiguration.TEZ_AM_RM_HEARTBEAT_INTERVAL_MS_MAX, 100); + conf.setInt(TezConfiguration.TEZ_AM_PREEMPTION_PERCENTAGE, 10); + conf.setInt(TezConfiguration.TEZ_AM_PREEMPTION_HEARTBEATS_BETWEEN_PREEMPTIONS, 3); + conf.setInt(TezConfiguration.TEZ_AM_PREEMPTION_MAX_WAIT_TIME_MS, 60 * 1000); + + + DagInfo mockDagInfo = mock(DagInfo.class); + when(mockDagInfo.getTotalVertices()).thenReturn(3); + when(mockDagInfo.getVertexDescendants(0)).thenReturn(BitSet.valueOf(new long[] { 0x6 })); + when(mockDagInfo.getVertexDescendants(1)).thenReturn(BitSet.valueOf(new long[] { 0x2 })); + when(mockDagInfo.getVertexDescendants(2)).thenReturn(new BitSet()); + TaskSchedulerContext mockApp = setupMockTaskSchedulerContext(appHost, appPort, appUrl, conf); + when(mockApp.getCurrentDagInfo()).thenReturn(mockDagInfo); + TaskSchedulerContextDrainable drainableAppCallback = createDrainableContext(mockApp); + + MockClock clock = new MockClock(1000); + NewTaskSchedulerForTest scheduler = new NewTaskSchedulerForTest(drainableAppCallback, + mockRMClient, clock); + + scheduler.initialize(); + drainableAppCallback.drain(); + + scheduler.start(); + drainableAppCallback.drain(); + verify(mockRMClient).start(); + verify(mockRMClient).registerApplicationMaster(appHost, appPort, appUrl); + RegisterApplicationMasterResponse regResponse = mockRMClient.getRegistrationResponse(); + verify(mockApp).setApplicationRegistrationData(regResponse.getMaximumResourceCapability(), + regResponse.getApplicationACLs(), regResponse.getClientToAMTokenMasterKey(), + regResponse.getQueue()); + + assertEquals(scheduler.getClusterNodeCount(), mockRMClient.getClusterNodeCount()); + + Priority priorityv0 = Priority.newInstance(1); + Priority priorityv2 = Priority.newInstance(3); + String[] hostsv0t0 = { "host1", "host2" }; + MockTaskInfo taskv0t0 = new MockTaskInfo("taskv0t0", priorityv0, hostsv0t0); + when(mockApp.getVertexIndexForTask(taskv0t0.task)).thenReturn(0); + MockTaskInfo taskv0t1 = new MockTaskInfo("taskv0t1", priorityv0, hostsv0t0); + when(mockApp.getVertexIndexForTask(taskv0t1.task)).thenReturn(0); + MockTaskInfo taskv2t0 = new MockTaskInfo("taskv2t0", priorityv2, hostsv0t0); + when(mockApp.getVertexIndexForTask(taskv2t0.task)).thenReturn(2); + MockTaskInfo taskv2t1 = new MockTaskInfo("taskv2t1", priorityv2, hostsv0t0); + when(mockApp.getVertexIndexForTask(taskv2t1.task)).thenReturn(2); + when(mockApp.getVertexIndexForTask(taskv2t0.task)).thenReturn(2); + + // asks for one task for vertex 2 and start running + TaskRequestCaptor taskRequestCaptor = new TaskRequestCaptor(mockRMClient, + scheduler, drainableAppCallback); + TaskRequest reqv2t0 = taskRequestCaptor.scheduleTask(taskv2t0); + NodeId host1 = NodeId.newInstance("host1", 1); + ApplicationAttemptId attemptId = ApplicationAttemptId.newInstance(ApplicationId.newInstance(1, 1), 1); + ContainerId cid1 = ContainerId.newContainerId(attemptId, 1); + Container container1 = Container.newInstance(cid1, host1, null, taskv2t0.capability, priorityv2, null); + scheduler.onContainersAllocated(Collections.singletonList(container1)); + drainableAppCallback.drain(); + verify(mockApp).taskAllocated(taskv2t0.task, taskv2t0.cookie, container1); + verify(mockRMClient).removeContainerRequest(reqv2t0); + clock.incrementTime(1000); + + when(mockRMClient.getAvailableResources()).thenReturn(Resources.none()); + scheduler.getProgress(); + scheduler.getProgress(); + scheduler.getProgress(); + drainableAppCallback.drain(); + //ask another task for v2 + TaskRequest reqv2t1 = taskRequestCaptor.scheduleTask(taskv2t1); + scheduler.getProgress(); + scheduler.getProgress(); + scheduler.getProgress(); + drainableAppCallback.drain(); + + clock.incrementTime(1000); + // add a request for vertex 0 but there is no headroom, this should preempt + when(mockRMClient.getAvailableResources()).thenReturn(Resources.none()); + TaskRequest reqv0t0 = taskRequestCaptor.scheduleTask(taskv0t0); + + // should preempt after enough heartbeats to get past preemption interval + scheduler.getProgress(); + scheduler.getProgress(); + scheduler.getProgress(); + drainableAppCallback.drain(); + verify(mockApp, times(1)).preemptContainer(any()); + verify(mockApp).preemptContainer(cid1); + String appMsg = "success"; + AppFinalStatus finalStatus = + new AppFinalStatus(FinalApplicationStatus.SUCCEEDED, appMsg, appUrl); + when(mockApp.getFinalAppStatus()).thenReturn(finalStatus); + scheduler.shutdown(); + drainableAppCallback.drain(); + verify(mockRMClient). + unregisterApplicationMaster(FinalApplicationStatus.SUCCEEDED, + appMsg, appUrl); + verify(mockRMClient).stop(); + } + + @Test(timeout=50000) + public void testContainerAssignmentReleaseNewContainers() throws Exception { + AMRMClientAsyncWrapperForTest mockRMClient = spy(new AMRMClientAsyncWrapperForTest()); + + String appHost = "host"; + int appPort = 0; + String appUrl = "url"; + + Configuration conf = new Configuration(); + conf.setBoolean(TezConfiguration.TEZ_AM_CONTAINER_REUSE_ENABLED, true); + conf.setInt(TezConfiguration.TEZ_AM_CONTAINER_REUSE_LOCALITY_DELAY_ALLOCATION_MILLIS, 100); + conf.setBoolean(TezConfiguration.TEZ_AM_CONTAINER_REUSE_RACK_FALLBACK_ENABLED, false); + conf.setBoolean(TezConfiguration.TEZ_AM_CONTAINER_REUSE_NON_LOCAL_FALLBACK_ENABLED, false); + conf.setBoolean(TezConfiguration.TEZ_AM_CONTAINER_REUSE_NEW_CONTAINERS_ENABLED, false); + conf.setInt(TezConfiguration.TEZ_AM_RM_HEARTBEAT_INTERVAL_MS_MAX, 100); + conf.setInt(TezConfiguration.TEZ_AM_CONTAINER_IDLE_RELEASE_TIMEOUT_MIN_MILLIS, 4000); + conf.setInt(TezConfiguration.TEZ_AM_CONTAINER_IDLE_RELEASE_TIMEOUT_MAX_MILLIS, 5000); + conf.setInt(TezConfiguration.TEZ_AM_SESSION_MIN_HELD_CONTAINERS, 5); + + DagInfo mockDagInfo = mock(DagInfo.class); + when(mockDagInfo.getTotalVertices()).thenReturn(10); + when(mockDagInfo.getVertexDescendants(anyInt())).thenReturn(new BitSet()); + TaskSchedulerContext mockApp = setupMockTaskSchedulerContext(appHost, appPort, appUrl, conf); + when(mockApp.getCurrentDagInfo()).thenReturn(mockDagInfo); + when(mockApp.isSession()).thenReturn(true); + TaskSchedulerContextDrainable drainableAppCallback = createDrainableContext(mockApp); + + MockClock clock = new MockClock(1000); + NewTaskSchedulerForTest scheduler = new NewTaskSchedulerForTest(drainableAppCallback, + mockRMClient, clock); + + scheduler.initialize(); + drainableAppCallback.drain(); + + scheduler.start(); + drainableAppCallback.drain(); + verify(mockRMClient).start(); + verify(mockRMClient).registerApplicationMaster(appHost, appPort, appUrl); + RegisterApplicationMasterResponse regResponse = mockRMClient.getRegistrationResponse(); + verify(mockApp).setApplicationRegistrationData(regResponse.getMaximumResourceCapability(), + regResponse.getApplicationACLs(), regResponse.getClientToAMTokenMasterKey(), + regResponse.getQueue()); + + assertEquals(scheduler.getClusterNodeCount(), mockRMClient.getClusterNodeCount()); + + final String rack1 = "/r1"; + final String rack2 = "/r2"; + final String node1Rack1 = "n1r1"; + final String node2Rack1 = "n2r1"; + final String node1Rack2 = "n1r2"; + MockDNSToSwitchMapping.addRackMapping(node1Rack1, rack1); + MockDNSToSwitchMapping.addRackMapping(node2Rack1, rack1); + MockDNSToSwitchMapping.addRackMapping(node1Rack2, rack2); + + Priority priorityv0 = Priority.newInstance(1); + MockTaskInfo taskv0t0 = new MockTaskInfo("taskv0t0", priorityv0, node1Rack1, rack1); + + ApplicationAttemptId attemptId = ApplicationAttemptId.newInstance(ApplicationId.newInstance(1, 1), 1); + ContainerId cid1 = ContainerId.newContainerId(attemptId, 1); + NodeId n2r1 = NodeId.newInstance(node2Rack1, 1); + Container container1 = Container.newInstance(cid1, n2r1, null, taskv0t0.capability, priorityv0, null); + + // verify new container is released is not immediately allocated + scheduler.onContainersAllocated(Collections.singletonList(container1)); + drainableAppCallback.drain(); + // app is not notified of the container being released since it never launched + verify(mockApp, never()).containerBeingReleased(cid1); + verify(mockRMClient).releaseAssignedContainer(eq(cid1)); + } + + @Test(timeout=50000) + public void testIdleContainerAssignmentReuseNewContainers() throws Exception { + AMRMClientAsyncWrapperForTest mockRMClient = spy(new AMRMClientAsyncWrapperForTest()); + + String appHost = "host"; + int appPort = 0; + String appUrl = "url"; + + Configuration conf = new Configuration(); + conf.setBoolean(TezConfiguration.TEZ_AM_CONTAINER_REUSE_ENABLED, true); + conf.setInt(TezConfiguration.TEZ_AM_CONTAINER_REUSE_LOCALITY_DELAY_ALLOCATION_MILLIS, 100); + conf.setBoolean(TezConfiguration.TEZ_AM_CONTAINER_REUSE_RACK_FALLBACK_ENABLED, false); + conf.setBoolean(TezConfiguration.TEZ_AM_CONTAINER_REUSE_NON_LOCAL_FALLBACK_ENABLED, false); + conf.setBoolean(TezConfiguration.TEZ_AM_CONTAINER_REUSE_NEW_CONTAINERS_ENABLED, true); + conf.setInt(TezConfiguration.TEZ_AM_RM_HEARTBEAT_INTERVAL_MS_MAX, 100); + conf.setInt(TezConfiguration.TEZ_AM_CONTAINER_IDLE_RELEASE_TIMEOUT_MIN_MILLIS, 4000); + conf.setInt(TezConfiguration.TEZ_AM_CONTAINER_IDLE_RELEASE_TIMEOUT_MAX_MILLIS, 5000); + conf.setInt(TezConfiguration.TEZ_AM_SESSION_MIN_HELD_CONTAINERS, 5); + + DagInfo mockDagInfo = mock(DagInfo.class); + when(mockDagInfo.getTotalVertices()).thenReturn(10); + when(mockDagInfo.getVertexDescendants(anyInt())).thenReturn(new BitSet()); + TaskSchedulerContext mockApp = setupMockTaskSchedulerContext(appHost, appPort, appUrl, conf); + when(mockApp.getCurrentDagInfo()).thenReturn(mockDagInfo); + when(mockApp.isSession()).thenReturn(true); + TaskSchedulerContextDrainable drainableAppCallback = createDrainableContext(mockApp); + + MockClock clock = new MockClock(1000); + NewTaskSchedulerForTest scheduler = new NewTaskSchedulerForTest(drainableAppCallback, + mockRMClient, clock); + + scheduler.initialize(); + drainableAppCallback.drain(); + + scheduler.start(); + drainableAppCallback.drain(); + verify(mockRMClient).start(); + verify(mockRMClient).registerApplicationMaster(appHost, appPort, appUrl); + RegisterApplicationMasterResponse regResponse = mockRMClient.getRegistrationResponse(); + verify(mockApp).setApplicationRegistrationData(regResponse.getMaximumResourceCapability(), + regResponse.getApplicationACLs(), regResponse.getClientToAMTokenMasterKey(), + regResponse.getQueue()); + + assertEquals(scheduler.getClusterNodeCount(), mockRMClient.getClusterNodeCount()); + + final String rack1 = "/r1"; + final String rack2 = "/r2"; + final String node1Rack1 = "n1r1"; + final String node2Rack1 = "n2r1"; + final String node1Rack2 = "n1r2"; + MockDNSToSwitchMapping.addRackMapping(node1Rack1, rack1); + MockDNSToSwitchMapping.addRackMapping(node2Rack1, rack1); + MockDNSToSwitchMapping.addRackMapping(node1Rack2, rack2); + + Priority priorityv0 = Priority.newInstance(1); + MockTaskInfo taskv0t0 = new MockTaskInfo("taskv0t0", priorityv0, node1Rack1, rack1); + + ApplicationAttemptId attemptId = ApplicationAttemptId.newInstance(ApplicationId.newInstance(1, 1), 1); + ContainerId cid1 = ContainerId.newContainerId(attemptId, 1); + NodeId n2r1 = NodeId.newInstance(node2Rack1, 1); + Container container1 = Container.newInstance(cid1, n2r1, null, taskv0t0.capability, priorityv0, null); + + // verify idle container is kept for now + scheduler.onContainersAllocated(Collections.singletonList(container1)); + clock.incrementTime(2000); + drainableAppCallback.drain(); + verify(mockApp, never()).containerBeingReleased(cid1); + verify(mockRMClient, never()).releaseAssignedContainer(cid1); + + // verify idle container is released without being assigned to a task because rack-local reuse is + // disabled + TaskRequestCaptor taskRequestCaptor = new TaskRequestCaptor(mockRMClient, + scheduler, drainableAppCallback); + TaskRequest reqv0t0 = taskRequestCaptor.scheduleTask(taskv0t0); + clock.incrementTime(10000); + drainableAppCallback.drain(); + verify(mockApp, never()).taskAllocated(taskv0t0.task, taskv0t0.cookie, container1); + verify(mockRMClient, never()).removeContainerRequest(reqv0t0); + verify(mockApp, never()).containerBeingReleased(cid1); + verify(mockRMClient).releaseAssignedContainer(cid1); + + // cancel the task request + assertFalse(scheduler.deallocateTask(taskv0t0.task, false, null, null)); + + // allocate another container that's node-local + ContainerId cid2 = ContainerId.newContainerId(attemptId, 2); + NodeId n1r1 = NodeId.newInstance(node1Rack1, 1); + Container container2 = Container.newInstance(cid2, n1r1, null, taskv0t0.capability, priorityv0, null); + scheduler.onContainersAllocated(Collections.singletonList(container2)); + clock.incrementTime(2000); + drainableAppCallback.drain(); + verify(mockApp, never()).containerBeingReleased(cid2); + verify(mockRMClient, never()).releaseAssignedContainer(cid2); + + // reschedule the task, verify it's now scheduled without a container request + // since node-local idle container is available + reqv0t0 = taskRequestCaptor.scheduleTask(taskv0t0, false); + verify(mockApp).taskAllocated(taskv0t0.task, taskv0t0.cookie, container2); + verify(mockRMClient).removeContainerRequest(reqv0t0); + } + + @Test + public void testMinMaxContainerIdleMillisAreEqual() throws Exception { + AMRMClientAsyncWrapperForTest mockRMClient = new AMRMClientAsyncWrapperForTest(); + Configuration conf = new Configuration(); + conf.setLong(TezConfiguration.TEZ_AM_CONTAINER_IDLE_RELEASE_TIMEOUT_MIN_MILLIS, 10000); + conf.setLong(TezConfiguration.TEZ_AM_CONTAINER_IDLE_RELEASE_TIMEOUT_MAX_MILLIS, 10000); + + TaskSchedulerContext mockApp = setupMockTaskSchedulerContext("host", 0, "url", conf); + TaskSchedulerContextDrainable drainableAppCallback = createDrainableContext(mockApp); + MockClock clock = new MockClock(1000); + NewTaskSchedulerForTest scheduler = new NewTaskSchedulerForTest(drainableAppCallback, mockRMClient, clock); + scheduler.initialize(); + + NodeId host1 = NodeId.newInstance("host1", 1); + Container container1 = Container.newInstance(null, host1, null, null, null, null); + HeldContainer heldContainer = scheduler.new HeldContainer(container1); + long now = clock.getTime(); + assertEquals(now + 10000, heldContainer.getIdleExpirationTimestamp(now)); + } + + static class AMRMClientAsyncWrapperForTest extends AMRMClientAsyncWrapper { + AMRMClientAsyncWrapperForTest() { + super(new MockAMRMClient(), 10000, null); + } + + RegisterApplicationMasterResponse getRegistrationResponse() { + return ((MockAMRMClient) client).getRegistrationResponse(); + } + + @Override + public RegisterApplicationMasterResponse registerApplicationMaster(String appHostName, int appHostPort, + String appTrackingUrl) throws YarnException, IOException { + return client.registerApplicationMaster(appHostName, appHostPort, appTrackingUrl); + } + + @Override + protected void serviceStart() { + } + + @Override + protected void serviceStop() { + } + } + + static class MockAMRMClient extends AMRMClientImpl { + private RegisterApplicationMasterResponse mockRegResponse; + + MockAMRMClient() { + super(); + this.clusterAvailableResources = Resource.newInstance(4000, 4); + this.clusterNodeCount = 5; + } + + @Override + protected void serviceStart() { + } + + @Override + protected void serviceStop() { + } + + @Override + public RegisterApplicationMasterResponse registerApplicationMaster(String appHostName, int appHostPort, + String appTrackingUrl) { + mockRegResponse = mock(RegisterApplicationMasterResponse.class); + Resource mockMaxResource = Resources.createResource(1024*1024, 1024); + Map mockAcls = Collections.emptyMap(); + when(mockRegResponse.getMaximumResourceCapability()).thenReturn( + mockMaxResource); + when(mockRegResponse.getApplicationACLs()).thenReturn(mockAcls); + when(mockRegResponse.getSchedulerResourceTypes()).thenReturn( + EnumSet.of(SchedulerResourceTypes.MEMORY, SchedulerResourceTypes.CPU)); + return mockRegResponse; + } + + @Override + public void unregisterApplicationMaster(FinalApplicationStatus appStatus, + String appMessage, String appTrackingUrl) { + } + + RegisterApplicationMasterResponse getRegistrationResponse() { + return mockRegResponse; + } + } + + static class MockTask { + final String name; + + MockTask(String name) { + this.name = name; + } + + @Override + public String toString() { + return name; + } + } + + static class MockTaskInfo { + final static Object DEFAULT_SIGNATURE = new Object(); + + final MockTask task; + final Object cookie = new Object(); + final Object signature = DEFAULT_SIGNATURE; + final String[] hosts; + final String[] racks; + final Priority priority; + final Resource capability; + + MockTaskInfo(String name, Priority priority, String host) { + this(name, priority, host == null ? null : new String[] { host }); + } + + MockTaskInfo(String name, Priority priority, String[] hosts) { + this(name, priority, hosts, buildDefaultRacks(hosts)); + } + + MockTaskInfo(String name, Priority priority, String host, String rack) { + this(name, priority, host == null ? null : new String[] { host }, + rack == null ? null : new String[] { rack }); + } + + MockTaskInfo(String name, Priority priority, String[] hosts, String[] racks) { + this.task = new MockTask(name); + this.hosts = hosts; + this.racks = racks; + this.priority = priority; + this.capability = Resource.newInstance(1024, 1); + } + + static String[] buildDefaultRacks(String[] hosts) { + if (hosts == null) { + return null; + } + String[] racks = new String[hosts.length]; + Arrays.fill(racks, "/default-rack"); + return racks; + } + } + + static class TaskRequestCaptor { + final AMRMClientAsync client; + final TaskScheduler scheduler; + final TaskSchedulerContextDrainable drainableAppCallback; + final ArgumentCaptor captor = ArgumentCaptor.forClass(TaskRequest.class); + int invocationCount = 0; + + TaskRequestCaptor(AMRMClientAsync client, TaskScheduler scheduler, + TaskSchedulerContextDrainable drainableAppCallback) { + this.client = client; + this.scheduler = scheduler; + this.drainableAppCallback = drainableAppCallback; + } + + TaskRequest scheduleTask(MockTaskInfo taskInfo) throws Exception { + return scheduleTask(taskInfo, true); + } + + TaskRequest scheduleTask(MockTaskInfo taskInfo, boolean expectContainerRequest) throws Exception { + scheduler.allocateTask(taskInfo.task, taskInfo.capability, taskInfo.hosts, taskInfo.racks, + taskInfo.priority, taskInfo.signature, taskInfo.cookie); + drainableAppCallback.drain(); + if (expectContainerRequest) { + ++invocationCount; + } + verify(client, times(invocationCount)).addContainerRequest(captor.capture()); + TaskRequest request = captor.getValue(); + assertEquals(request.getTask(), taskInfo.task); + assertEquals(request.getCookie(), taskInfo.cookie); + return request; + } + + TaskRequest scheduleTask(MockTaskInfo taskInfo, ContainerId affinity) throws Exception { + scheduler.allocateTask(taskInfo.task, taskInfo.capability, affinity, taskInfo.priority, + taskInfo.signature, taskInfo.cookie); + drainableAppCallback.drain(); + verify(client, times(++invocationCount)).addContainerRequest(captor.capture()); + TaskRequest request = captor.getValue(); + assertEquals(request.getTask(), taskInfo.task); + assertEquals(request.getCookie(), taskInfo.cookie); + return request; + } + } + + static class NewTaskSchedulerForTest extends DagAwareYarnTaskScheduler { + final AMRMClientAsyncWrapper mockClient; + final MockClock clock; + + NewTaskSchedulerForTest( + TaskSchedulerContextDrainable appClient, + AMRMClientAsyncWrapper client, MockClock clock) { + super(appClient); + this.mockClient = client; + this.clock = clock; + setShouldUnregister(); + } + + @Override + public void initialize() throws Exception { + initialize(mockClient); + } + + @Override + protected ScheduledExecutorService createExecutor() { + return new ControlledScheduledExecutorService(clock); + } + + @Override + protected long now() { + return clock.getTime(); + } + } +} diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestLocalTaskScheduler.java b/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestLocalTaskScheduler.java index 2ada2f17af..d7b516add2 100644 --- a/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestLocalTaskScheduler.java +++ b/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestLocalTaskScheduler.java @@ -20,7 +20,7 @@ import java.util.HashMap; import java.util.LinkedHashMap; -import java.util.concurrent.PriorityBlockingQueue; +import java.util.concurrent.LinkedBlockingQueue; import org.apache.hadoop.conf.Configuration; import org.apache.tez.serviceplugins.api.TaskSchedulerContext; @@ -29,13 +29,13 @@ import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.ApplicationId; -import org.apache.hadoop.yarn.api.records.Container; import org.apache.hadoop.yarn.api.records.Priority; import org.apache.tez.dag.api.TezConfiguration; +import org.apache.tez.dag.app.rm.LocalTaskSchedulerService.AllocatedTask; import org.apache.tez.dag.app.rm.LocalTaskSchedulerService.AsyncDelegateRequestHandler; import org.apache.tez.dag.app.rm.LocalTaskSchedulerService.LocalContainerFactory; -import org.apache.tez.dag.app.rm.LocalTaskSchedulerService.TaskRequest; +import org.apache.tez.dag.app.rm.LocalTaskSchedulerService.SchedulerRequest; public class TestLocalTaskScheduler { @@ -56,12 +56,12 @@ public void maxTasksAllocationsCannotBeExceeded() { LocalContainerFactory containerFactory = new LocalContainerFactory(appAttemptId, 1000); - HashMap taskAllocations = new LinkedHashMap(); - PriorityBlockingQueue taskRequestQueue = new PriorityBlockingQueue(); + HashMap taskAllocations = new LinkedHashMap<>(); + LinkedBlockingQueue clientRequestQueue = new LinkedBlockingQueue<>(); // Object under test AsyncDelegateRequestHandler requestHandler = - new AsyncDelegateRequestHandler(taskRequestQueue, + new AsyncDelegateRequestHandler(clientRequestQueue, containerFactory, taskAllocations, mockContext, @@ -71,17 +71,18 @@ public void maxTasksAllocationsCannotBeExceeded() { for (int i = 0; i < MAX_TASKS; i++) { Priority priority = Priority.newInstance(20); requestHandler.addAllocateTaskRequest(new Long(i), null, priority, null); - requestHandler.processRequest(); + requestHandler.dispatchRequest(); + requestHandler.allocateTask(); } // Only MAX_TASKS number of tasks should have been allocated Assert.assertEquals("Wrong number of allocate tasks", MAX_TASKS, taskAllocations.size()); - Assert.assertTrue("Another allocation should not fit", requestHandler.shouldWait()); + Assert.assertTrue("Another allocation should not fit", !requestHandler.shouldProcess()); // Deallocate down to zero for (int i = 0; i < MAX_TASKS; i++) { requestHandler.addDeallocateTaskRequest(new Long(i)); - requestHandler.processRequest(); + requestHandler.dispatchRequest(); } // All allocated tasks should have been removed diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestLocalTaskSchedulerService.java b/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestLocalTaskSchedulerService.java index 3b2de34bda..e193ee98f2 100644 --- a/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestLocalTaskSchedulerService.java +++ b/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestLocalTaskSchedulerService.java @@ -18,20 +18,25 @@ package org.apache.tez.dag.app.rm; +import java.util.BitSet; import java.util.HashMap; -import java.util.concurrent.BlockingQueue; +import java.util.concurrent.LinkedBlockingQueue; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.ApplicationId; -import org.apache.hadoop.yarn.api.records.Container; +import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.Priority; import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.tez.dag.api.TezConfiguration; import org.apache.tez.dag.app.dag.Task; import org.apache.tez.dag.app.rm.TestLocalTaskSchedulerService.MockLocalTaskSchedulerSerivce.MockAsyncDelegateRequestHandler; +import org.apache.tez.serviceplugins.api.DagInfo; import org.apache.tez.serviceplugins.api.TaskSchedulerContext; import org.junit.Assert; import org.junit.Test; +import org.mockito.invocation.InvocationOnMock; +import org.mockito.stubbing.Answer; import static org.junit.Assert.*; import static org.mockito.Mockito.*; @@ -91,6 +96,9 @@ public void testDeallocationBeforeAllocation() throws InterruptedException { taskSchedulerService.initialize(); taskSchedulerService.start(); + // create a task that fills the task allocation queue + Task dummy_task = mock(Task.class); + taskSchedulerService.allocateTask(dummy_task, Resource.newInstance(1024, 1), null, null, Priority.newInstance(1), null, null); Task task = mock(Task.class); taskSchedulerService.allocateTask(task, Resource.newInstance(1024, 1), null, null, Priority.newInstance(1), null, null); taskSchedulerService.deallocateTask(task, false, null, null); @@ -98,10 +106,10 @@ public void testDeallocationBeforeAllocation() throws InterruptedException { taskSchedulerService.startRequestHandlerThread(); MockAsyncDelegateRequestHandler requestHandler = taskSchedulerService.getRequestHandler(); - requestHandler.drainRequest(1); + requestHandler.drainRequest(3); assertEquals(1, requestHandler.deallocateCount); // The corresponding AllocateTaskRequest will be removed, so won't been processed. - assertEquals(0, requestHandler.allocateCount); + assertEquals(1, requestHandler.allocateCount); taskSchedulerService.shutdown(); } @@ -135,6 +143,82 @@ public void testDeallocationAfterAllocation() throws InterruptedException { taskSchedulerService.shutdown(); } + @Test + public void preemptDescendantsOnly() { + + final int MAX_TASKS = 2; + TezConfiguration tezConf = new TezConfiguration(); + tezConf.setInt(TezConfiguration.TEZ_AM_INLINE_TASK_EXECUTION_MAX_TASKS, MAX_TASKS); + + ApplicationId appId = ApplicationId.newInstance(2000, 1); + ApplicationAttemptId appAttemptId = ApplicationAttemptId.newInstance(appId, 1); + Long parentTask1 = new Long(1); + Long parentTask2 = new Long(2); + Long childTask1 = new Long(3); + Long grandchildTask1 = new Long(4); + + TaskSchedulerContext + mockContext = TestTaskSchedulerHelpers.setupMockTaskSchedulerContext("", 0, "", true, + appAttemptId, 1000l, null, tezConf); + when(mockContext.getVertexIndexForTask(parentTask1)).thenReturn(0); + when(mockContext.getVertexIndexForTask(parentTask2)).thenReturn(0); + when(mockContext.getVertexIndexForTask(childTask1)).thenReturn(1); + when(mockContext.getVertexIndexForTask(grandchildTask1)).thenReturn(2); + + DagInfo mockDagInfo = mock(DagInfo.class); + when(mockDagInfo.getTotalVertices()).thenReturn(3); + BitSet vertex1Descendants = new BitSet(); + vertex1Descendants.set(1); + vertex1Descendants.set(2); + BitSet vertex2Descendants = new BitSet(); + vertex2Descendants.set(2); + BitSet vertex3Descendants = new BitSet(); + when(mockDagInfo.getVertexDescendants(0)).thenReturn(vertex1Descendants); + when(mockDagInfo.getVertexDescendants(1)).thenReturn(vertex2Descendants); + when(mockDagInfo.getVertexDescendants(2)).thenReturn(vertex3Descendants); + when(mockContext.getCurrentDagInfo()).thenReturn(mockDagInfo); + + Priority priority1 = Priority.newInstance(1); + Priority priority2 = Priority.newInstance(2); + Priority priority3 = Priority.newInstance(3); + Priority priority4 = Priority.newInstance(4); + Resource resource = Resource.newInstance(1024, 1); + + MockLocalTaskSchedulerSerivce taskSchedulerService = new MockLocalTaskSchedulerSerivce(mockContext); + + // The mock context need to send a deallocate container request to the scheduler service + Answer answer = new Answer() { + @Override + public Void answer(InvocationOnMock invocation) { + ContainerId containerId = invocation.getArgument(0, ContainerId.class); + taskSchedulerService.deallocateContainer(containerId); + return null; + } + }; + doAnswer(answer).when(mockContext).preemptContainer(any()); + + taskSchedulerService.initialize(); + taskSchedulerService.start(); + taskSchedulerService.startRequestHandlerThread(); + + MockAsyncDelegateRequestHandler requestHandler = taskSchedulerService.getRequestHandler(); + taskSchedulerService.allocateTask(parentTask1, resource, null, null, priority1, null, null); + taskSchedulerService.allocateTask(childTask1, resource, null, null, priority3, null, null); + taskSchedulerService.allocateTask(grandchildTask1, resource, null, null, priority4, null, null); + requestHandler.drainRequest(3); + + // We should not preempt if we have not reached max task allocations + Assert.assertEquals("Wrong number of allocate tasks", MAX_TASKS, requestHandler.allocateCount); + Assert.assertTrue("Another allocation should not fit", !requestHandler.shouldProcess()); + + // Next task allocation should preempt + taskSchedulerService.allocateTask(parentTask2, Resource.newInstance(1024, 1), null, null, priority2, null, null); + requestHandler.drainRequest(5); + + // All allocated tasks should have been removed + Assert.assertEquals("Wrong number of preempted tasks", 1, requestHandler.preemptCount); + } + static class MockLocalTaskSchedulerSerivce extends LocalTaskSchedulerService { private MockAsyncDelegateRequestHandler requestHandler; @@ -170,25 +254,32 @@ static class MockAsyncDelegateRequestHandler extends AsyncDelegateRequestHandler public int allocateCount = 0; public int deallocateCount = 0; - public int processedCount =0; + public int preemptCount = 0; + public int dispatchCount = 0; MockAsyncDelegateRequestHandler( - BlockingQueue taskRequestQueue, + LinkedBlockingQueue taskRequestQueue, LocalContainerFactory localContainerFactory, - HashMap taskAllocations, + HashMap taskAllocations, TaskSchedulerContext appClientDelegate, Configuration conf) { super(taskRequestQueue, localContainerFactory, taskAllocations, appClientDelegate, conf); } @Override - void processRequest() { - super.processRequest(); - processedCount ++; + void dispatchRequest() { + super.dispatchRequest(); + dispatchCount++; + } + + @Override + void allocateTask() { + super.allocateTask(); + allocateCount++; } public void drainRequest(int count) { - while(processedCount != count || !taskRequestQueue.isEmpty()) { + while(dispatchCount != count || !clientRequestQueue.isEmpty()) { try { Thread.sleep(100); } catch (InterruptedException e) { @@ -198,15 +289,15 @@ public void drainRequest(int count) { } @Override - void allocateTask(AllocateTaskRequest request) { - super.allocateTask(request); - allocateCount ++; + void deallocateTask(DeallocateTaskRequest request) { + super.deallocateTask(request); + deallocateCount++; } @Override - void deallocateTask(DeallocateTaskRequest request) { - super.deallocateTask(request); - deallocateCount ++; + void preemptTask(DeallocateContainerRequest request) { + super.preemptTask(request); + preemptCount++; } } } diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestTaskScheduler.java b/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestTaskScheduler.java index 16c560e946..6755ee6cd4 100644 --- a/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestTaskScheduler.java +++ b/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestTaskScheduler.java @@ -23,11 +23,12 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; -import static org.mockito.Matchers.any; -import static org.mockito.Matchers.anyInt; -import static org.mockito.Matchers.anyString; -import static org.mockito.Matchers.eq; import static org.mockito.Mockito.RETURNS_DEEP_STUBS; +import static org.mockito.Mockito.any; +import static org.mockito.Mockito.anyInt; +import static org.mockito.Mockito.anyString; +import static org.mockito.Mockito.doAnswer; +import static org.mockito.Mockito.eq; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.spy; import static org.mockito.Mockito.times; @@ -60,6 +61,8 @@ import org.apache.tez.common.ContainerSignatureMatcher; import org.apache.tez.common.MockDNSToSwitchMapping; import org.apache.tez.dag.api.TezConfiguration; +import org.apache.tez.dag.app.dag.Task; +import org.apache.tez.dag.app.dag.TaskAttempt; import org.apache.tez.dag.app.rm.TestTaskSchedulerHelpers.AMRMClientAsyncForTest; import org.apache.tez.dag.app.rm.TestTaskSchedulerHelpers.AMRMClientForTest; import org.apache.tez.dag.app.rm.TestTaskSchedulerHelpers.AlwaysMatchesContainerMatcher; @@ -68,7 +71,6 @@ import org.apache.tez.dag.app.rm.TestTaskSchedulerHelpers.TaskSchedulerWithDrainableContext; import org.apache.tez.dag.app.rm.YarnTaskSchedulerService.CookieContainerRequest; import org.apache.tez.dag.app.rm.YarnTaskSchedulerService.HeldContainer; -import org.apache.tez.serviceplugins.api.DagInfo; import org.apache.tez.serviceplugins.api.TaskSchedulerContext; import org.apache.tez.serviceplugins.api.TaskSchedulerContext.AppFinalStatus; import org.junit.After; @@ -77,7 +79,6 @@ import org.junit.BeforeClass; import org.junit.Test; import org.mockito.ArgumentCaptor; -import org.mockito.Mockito; import org.mockito.invocation.InvocationOnMock; import org.mockito.stubbing.Answer; @@ -89,6 +90,10 @@ public class TestTaskScheduler { static ContainerSignatureMatcher containerSignatureMatcher = new AlwaysMatchesContainerMatcher(); private ExecutorService contextCallbackExecutor; + private static final String DEFAULT_APP_HOST = "host"; + private static final String DEFAULT_APP_URL = "url"; + private static final String SUCCEED_APP_MESSAGE = "success"; + private static final int DEFAULT_APP_PORT = 0; @BeforeClass public static void beforeClass() { @@ -122,16 +127,12 @@ public void testTaskSchedulerNoReuse() throws Exception { AMRMClientAsyncForTest mockRMClient = spy( new AMRMClientAsyncForTest(new AMRMClientForTest(), 100)); - String appHost = "host"; - int appPort = 0; - String appUrl = "url"; - Configuration conf = new Configuration(); conf.setBoolean(TezConfiguration.TEZ_AM_CONTAINER_REUSE_ENABLED, false); int interval = 100; conf.setInt(TezConfiguration.TEZ_AM_RM_HEARTBEAT_INTERVAL_MS_MAX, interval); - TaskSchedulerContext mockApp = setupMockTaskSchedulerContext(appHost, appPort, appUrl, conf); + TaskSchedulerContext mockApp = setupMockTaskSchedulerContext(DEFAULT_APP_HOST, DEFAULT_APP_PORT, DEFAULT_APP_URL, conf); TaskSchedulerContextDrainable drainableAppCallback = createDrainableContext(mockApp); TaskSchedulerWithDrainableContext scheduler = @@ -146,7 +147,7 @@ public void testTaskSchedulerNoReuse() throws Exception { scheduler.start(); drainableAppCallback.drain(); verify(mockRMClient).start(); - verify(mockRMClient).registerApplicationMaster(appHost, appPort, appUrl); + verify(mockRMClient).registerApplicationMaster(DEFAULT_APP_HOST, DEFAULT_APP_PORT, DEFAULT_APP_URL); RegisterApplicationMasterResponse regResponse = mockRMClient.getRegistrationResponse(); verify(mockApp).setApplicationRegistrationData(regResponse.getMaximumResourceCapability(), regResponse.getApplicationACLs(), @@ -155,12 +156,12 @@ public void testTaskSchedulerNoReuse() throws Exception { Assert.assertEquals(scheduler.getClusterNodeCount(), mockRMClient.getClusterNodeCount()); - Object mockTask1 = mock(Object.class); - Object mockCookie1 = mock(Object.class); - Resource mockCapability = mock(Resource.class); + Object mockTask1 = new MockTask("task1"); + Object mockCookie1 = new Object(); + Resource mockCapability = Resource.newInstance(1024, 1); String[] hosts = {"host1", "host5"}; String[] racks = {"/default-rack", "/default-rack"}; - Priority mockPriority = mock(Priority.class); + Priority mockPriority = Priority.newInstance(1); ArgumentCaptor requestCaptor = ArgumentCaptor.forClass(CookieContainerRequest.class); // allocate task @@ -172,7 +173,7 @@ public void testTaskSchedulerNoReuse() throws Exception { // returned from task requests before allocation happens assertFalse(scheduler.deallocateTask(mockTask1, true, null, null)); - verify(mockApp, times(0)).containerBeingReleased(any(ContainerId.class)); + verify(mockApp, times(0)).containerBeingReleased(any()); verify(mockRMClient, times(1)). removeContainerRequest((CookieContainerRequest) any()); verify(mockRMClient, times(0)). @@ -180,17 +181,17 @@ public void testTaskSchedulerNoReuse() throws Exception { // deallocating unknown task assertFalse(scheduler.deallocateTask(mockTask1, true, null, null)); - verify(mockApp, times(0)).containerBeingReleased(any(ContainerId.class)); + verify(mockApp, times(0)).containerBeingReleased(any()); verify(mockRMClient, times(1)). removeContainerRequest((CookieContainerRequest) any()); verify(mockRMClient, times(0)). releaseAssignedContainer((ContainerId) any()); // allocate tasks - Object mockTask2 = mock(Object.class); - Object mockCookie2 = mock(Object.class); - Object mockTask3 = mock(Object.class); - Object mockCookie3 = mock(Object.class); + Object mockTask2 = new MockTask("task2"); + Object mockCookie2 = new Object(); + Object mockTask3 = new MockTask("task3"); + Object mockCookie3 = new Object(); scheduler.allocateTask(mockTask1, mockCapability, hosts, racks, mockPriority, null, mockCookie1); drainableAppCallback.drain(); @@ -210,26 +211,23 @@ public void testTaskSchedulerNoReuse() throws Exception { addContainerRequest(requestCaptor.capture()); CookieContainerRequest request3 = requestCaptor.getValue(); - List containers = new ArrayList(); - Container mockContainer1 = mock(Container.class, RETURNS_DEEP_STUBS); - when(mockContainer1.getNodeId().getHost()).thenReturn("host1"); - ContainerId mockCId1 = mock(ContainerId.class); - when(mockContainer1.getId()).thenReturn(mockCId1); + NodeId host1 = NodeId.newInstance("host1", 1); + NodeId host2 = NodeId.newInstance("host2", 2); + NodeId host3 = NodeId.newInstance("host3", 3); + NodeId host4 = NodeId.newInstance("host4", 4); + ApplicationAttemptId attemptId = ApplicationAttemptId.newInstance(ApplicationId.newInstance(1, 1), 1); + ContainerId mockCId1 = ContainerId.newContainerId(attemptId, 1); + Container mockContainer1 = Container.newInstance(mockCId1, host1, null, mockCapability, mockPriority, null); + ContainerId mockCId2 = ContainerId.newContainerId(attemptId, 2); + Container mockContainer2 = Container.newInstance(mockCId2, host2, null, mockCapability, mockPriority, null); + ContainerId mockCId3 = ContainerId.newContainerId(attemptId, 3); + Container mockContainer3 = Container.newInstance(mockCId3, host3, null, mockCapability, mockPriority, null); + ContainerId mockCId4 = ContainerId.newContainerId(attemptId, 4); + Container mockContainer4 = Container.newInstance(mockCId4, host4, null, mockCapability, mockPriority, null); + List containers = new ArrayList<>(); containers.add(mockContainer1); - Container mockContainer2 = mock(Container.class, RETURNS_DEEP_STUBS); - when(mockContainer2.getNodeId().getHost()).thenReturn("host2"); - ContainerId mockCId2 = mock(ContainerId.class); - when(mockContainer2.getId()).thenReturn(mockCId2); containers.add(mockContainer2); - Container mockContainer3 = mock(Container.class, RETURNS_DEEP_STUBS); - when(mockContainer3.getNodeId().getHost()).thenReturn("host3"); - ContainerId mockCId3 = mock(ContainerId.class); - when(mockContainer3.getId()).thenReturn(mockCId3); containers.add(mockContainer3); - Container mockContainer4 = mock(Container.class, RETURNS_DEEP_STUBS); - when(mockContainer4.getNodeId().getHost()).thenReturn("host4"); - ContainerId mockCId4 = mock(ContainerId.class); - when(mockContainer4.getId()).thenReturn(mockCId4); containers.add(mockContainer4); scheduler.onContainersAllocated(containers); drainableAppCallback.drain(); @@ -284,21 +282,17 @@ public void testTaskSchedulerNoReuse() throws Exception { // verify blacklisting verify(mockRMClient, times(0)).addNodeToBlacklist((NodeId)any()); String badHost = "host6"; - NodeId badNodeId = mock(NodeId.class); - when(badNodeId.getHost()).thenReturn(badHost); + NodeId badNodeId = NodeId.newInstance(badHost, 1); scheduler.blacklistNode(badNodeId); verify(mockRMClient, times(1)).addNodeToBlacklist(badNodeId); - Object mockTask4 = mock(Object.class); - Object mockCookie4 = mock(Object.class); + Object mockTask4 = new MockTask("task4"); + Object mockCookie4 = new Object(); scheduler.allocateTask(mockTask4, mockCapability, null, null, mockPriority, null, mockCookie4); drainableAppCallback.drain(); verify(mockRMClient, times(5)).addContainerRequest(requestCaptor.capture()); - Container mockContainer5 = mock(Container.class, RETURNS_DEEP_STUBS); - when(mockContainer5.getNodeId().getHost()).thenReturn(badHost); - when(mockContainer5.getNodeId()).thenReturn(badNodeId); - ContainerId mockCId5 = mock(ContainerId.class); - when(mockContainer5.getId()).thenReturn(mockCId5); + ContainerId mockCId5 = ContainerId.newContainerId(attemptId, 5); + Container mockContainer5 = Container.newInstance(mockCId5, badNodeId, null, mockCapability, mockPriority, null); containers.clear(); containers.add(mockContainer5); scheduler.onContainersAllocated(containers); @@ -310,10 +304,9 @@ public void testTaskSchedulerNoReuse() throws Exception { verify(mockRMClient, times(4)).releaseAssignedContainer((ContainerId) any()); // verify request added back verify(mockRMClient, times(6)).addContainerRequest(requestCaptor.capture()); - Container mockContainer6 = mock(Container.class, RETURNS_DEEP_STUBS); - when(mockContainer6.getNodeId().getHost()).thenReturn("host7"); - ContainerId mockCId6 = mock(ContainerId.class); - when(mockContainer6.getId()).thenReturn(mockCId6); + ContainerId mockCId6 = ContainerId.newContainerId(attemptId, 6); + NodeId host7 = NodeId.newInstance("host7", 7); + Container mockContainer6 = Container.newInstance(mockCId6, host7, null, mockCapability, mockPriority, null); containers.clear(); containers.add(mockContainer6); scheduler.onContainersAllocated(containers); @@ -354,6 +347,25 @@ public void testTaskSchedulerNoReuse() throws Exception { (CookieContainerRequest) any()); assertFalse(scheduler.deallocateTask(mockTask1, true, null, null)); + // test speculative node adjustment + String speculativeNode = "host8"; + NodeId speculativeNodeId = mock(NodeId.class); + when(speculativeNodeId.getHost()).thenReturn(speculativeNode); + TaskAttempt mockTask5 = mock(TaskAttempt.class); + Task task = mock(Task.class); + when(mockTask5.getTask()).thenReturn(task); + when(task.getNodesWithRunningAttempts()).thenReturn(Sets.newHashSet(speculativeNodeId)); + Object mockCookie5 = new Object(); + scheduler.allocateTask(mockTask5, mockCapability, hosts, racks, + mockPriority, null, mockCookie5); + drainableAppCallback.drain(); + // no new allocation + verify(mockApp, times(4)).taskAllocated(any(), any(), (Container) any()); + // verify container released + verify(mockRMClient, times(5)).releaseAssignedContainer((ContainerId) any()); + // verify request added back + verify(mockRMClient, times(9)).addContainerRequest(requestCaptor.capture()); + List mockUpdatedNodes = mock(List.class); scheduler.onNodesUpdated(mockUpdatedNodes); drainableAppCallback.drain(); @@ -365,30 +377,26 @@ public void testTaskSchedulerNoReuse() throws Exception { drainableAppCallback.drain(); verify(mockApp) .reportError(eq(YarnTaskSchedulerServiceError.RESOURCEMANAGER_ERROR), argumentCaptor.capture(), - any(DagInfo.class)); + any()); assertTrue(argumentCaptor.getValue().contains("mockexception")); scheduler.onShutdownRequest(); drainableAppCallback.drain(); verify(mockApp).appShutdownRequested(); - String appMsg = "success"; AppFinalStatus finalStatus = - new AppFinalStatus(FinalApplicationStatus.SUCCEEDED, appMsg, appUrl); + new AppFinalStatus(FinalApplicationStatus.SUCCEEDED, SUCCEED_APP_MESSAGE, DEFAULT_APP_URL); when(mockApp.getFinalAppStatus()).thenReturn(finalStatus); scheduler.shutdown(); drainableAppCallback.drain(); verify(mockRMClient). unregisterApplicationMaster(FinalApplicationStatus.SUCCEEDED, - appMsg, appUrl); + SUCCEED_APP_MESSAGE, DEFAULT_APP_URL); verify(mockRMClient).stop(); } @Test(timeout=10000) public void testTaskSchedulerInitiateStop() throws Exception { - String appHost = "host"; - int appPort = 0; - String appUrl = "url"; Configuration conf = new Configuration(); conf.setLong(TezConfiguration.TEZ_AM_CONTAINER_REUSE_LOCALITY_DELAY_ALLOCATION_MILLIS, 0); @@ -396,7 +404,7 @@ public void testTaskSchedulerInitiateStop() throws Exception { conf.setLong(TezConfiguration.TEZ_AM_CONTAINER_IDLE_RELEASE_TIMEOUT_MIN_MILLIS, 10000); conf.setLong(TezConfiguration.TEZ_AM_CONTAINER_IDLE_RELEASE_TIMEOUT_MAX_MILLIS, 10000); - TaskSchedulerContext mockApp = setupMockTaskSchedulerContext(appHost, appPort, appUrl, conf); + TaskSchedulerContext mockApp = setupMockTaskSchedulerContext(DEFAULT_APP_HOST, DEFAULT_APP_PORT, DEFAULT_APP_URL, conf); final TaskSchedulerContextDrainable drainableAppCallback = createDrainableContext(mockApp); TezAMRMClientAsync mockRMClient = spy( @@ -411,21 +419,19 @@ public void testTaskSchedulerInitiateStop() throws Exception { scheduler.start(); drainableAppCallback.drain(); - Object mockTask1 = mock(Object.class); - when(mockTask1.toString()).thenReturn("task1"); - Object mockCookie1 = mock(Object.class); - Resource mockCapability = mock(Resource.class); + Object mockTask1 = new MockTask("task1"); + Object mockCookie1 = new Object(); + Resource mockCapability = Resource.newInstance(1024, 1); String[] hosts = {"host1", "host5"}; String[] racks = {"/default-rack", "/default-rack"}; final Priority mockPriority1 = Priority.newInstance(1); final Priority mockPriority2 = Priority.newInstance(2); final Priority mockPriority3 = Priority.newInstance(3); - Object mockTask2 = mock(Object.class); - when(mockTask2.toString()).thenReturn("task2"); - Object mockCookie2 = mock(Object.class); - Object mockTask3 = mock(Object.class); - when(mockTask3.toString()).thenReturn("task3"); - Object mockCookie3 = mock(Object.class); + Priority mockPriority = Priority.newInstance(1); + Object mockTask2 = new MockTask("task2"); + Object mockCookie2 = new Object(); + Object mockTask3 = new MockTask("task3"); + Object mockCookie3 = new Object(); ArgumentCaptor requestCaptor = ArgumentCaptor.forClass(CookieContainerRequest.class); @@ -450,21 +456,14 @@ public void testTaskSchedulerInitiateStop() throws Exception { List containers = new ArrayList(); // sending lower priority container first to make sure its not matched - Container mockContainer1 = mock(Container.class, RETURNS_DEEP_STUBS); - when(mockContainer1.getNodeId().getHost()).thenReturn("host1"); - when(mockContainer1.getPriority()).thenReturn(mockPriority1); - when(mockContainer1.toString()).thenReturn("container1"); - ContainerId mockCId1 = mock(ContainerId.class); - when(mockContainer1.getId()).thenReturn(mockCId1); - when(mockCId1.toString()).thenReturn("container1"); + NodeId host1 = NodeId.newInstance("host1", 1); + NodeId host2 = NodeId.newInstance("host2", 2); + ApplicationAttemptId attemptId = ApplicationAttemptId.newInstance(ApplicationId.newInstance(1, 1), 1); + ContainerId mockCId1 = ContainerId.newContainerId(attemptId, 1); + Container mockContainer1 = Container.newInstance(mockCId1, host1, null, mockCapability, mockPriority, null); + ContainerId mockCId2 = ContainerId.newContainerId(attemptId, 2); + Container mockContainer2 = Container.newInstance(mockCId2, host2, null, mockCapability, mockPriority, null); containers.add(mockContainer1); - Container mockContainer2 = mock(Container.class, RETURNS_DEEP_STUBS); - when(mockContainer2.getNodeId().getHost()).thenReturn("host2"); - when(mockContainer2.getPriority()).thenReturn(mockPriority2); - when(mockContainer2.toString()).thenReturn("container2"); - ContainerId mockCId2 = mock(ContainerId.class); - when(mockContainer2.getId()).thenReturn(mockCId2); - when(mockCId2.toString()).thenReturn("container2"); containers.add(mockContainer2); ArrayList hostContainers = @@ -506,10 +505,6 @@ public void testTaskSchedulerWithReuse() throws Exception { TezAMRMClientAsync mockRMClient = spy( new AMRMClientAsyncForTest(new AMRMClientForTest(), 100)); - String appHost = "host"; - int appPort = 0; - String appUrl = "url"; - Configuration conf = new Configuration(); // to match all in the same pass conf.setLong(TezConfiguration.TEZ_AM_CONTAINER_REUSE_LOCALITY_DELAY_ALLOCATION_MILLIS, 0); @@ -517,7 +512,7 @@ public void testTaskSchedulerWithReuse() throws Exception { conf.setLong(TezConfiguration.TEZ_AM_CONTAINER_IDLE_RELEASE_TIMEOUT_MIN_MILLIS, 0); conf.setLong(TezConfiguration.TEZ_AM_CONTAINER_IDLE_RELEASE_TIMEOUT_MAX_MILLIS, 0); - TaskSchedulerContext mockApp = setupMockTaskSchedulerContext(appHost, appPort, appUrl, conf); + TaskSchedulerContext mockApp = setupMockTaskSchedulerContext(DEFAULT_APP_HOST, DEFAULT_APP_PORT, DEFAULT_APP_URL, conf); final TaskSchedulerContextDrainable drainableAppCallback = createDrainableContext(mockApp); TaskSchedulerWithDrainableContext scheduler = @@ -529,10 +524,9 @@ public void testTaskSchedulerWithReuse() throws Exception { scheduler.start(); drainableAppCallback.drain(); - Object mockTask1 = mock(Object.class); - when(mockTask1.toString()).thenReturn("task1"); - Object mockCookie1 = mock(Object.class); - Resource mockCapability = mock(Resource.class); + Object mockTask1 = new MockTask("task1"); + Object mockCookie1 = new Object(); + Resource mockCapability = Resource.newInstance(1024, 1); String[] hosts = {"host1", "host5"}; String[] racks = {"/default-rack", "/default-rack"}; final Priority mockPriority1 = Priority.newInstance(1); @@ -540,12 +534,10 @@ public void testTaskSchedulerWithReuse() throws Exception { final Priority mockPriority3 = Priority.newInstance(3); final Priority mockPriority4 = Priority.newInstance(4); final Priority mockPriority5 = Priority.newInstance(5); - Object mockTask2 = mock(Object.class); - when(mockTask2.toString()).thenReturn("task2"); - Object mockCookie2 = mock(Object.class); - Object mockTask3 = mock(Object.class); - when(mockTask3.toString()).thenReturn("task3"); - Object mockCookie3 = mock(Object.class); + Object mockTask2 = new MockTask("task2"); + Object mockCookie2 = new Object(); + Object mockTask3 = new MockTask("task3"); + Object mockCookie3 = new Object(); ArgumentCaptor requestCaptor = ArgumentCaptor.forClass(CookieContainerRequest.class); @@ -568,39 +560,24 @@ public void testTaskSchedulerWithReuse() throws Exception { addContainerRequest(requestCaptor.capture()); CookieContainerRequest request3 = requestCaptor.getValue(); - List containers = new ArrayList(); + NodeId host1 = NodeId.newInstance("host1", 1); + NodeId host2 = NodeId.newInstance("host2", 2); + NodeId host3 = NodeId.newInstance("host3", 3); + NodeId host4 = NodeId.newInstance("host4", 4); + ApplicationAttemptId attemptId = ApplicationAttemptId.newInstance(ApplicationId.newInstance(1, 1), 1); + ContainerId mockCId1 = ContainerId.newContainerId(attemptId, 1); + Container mockContainer1 = Container.newInstance(mockCId1, host1, null, mockCapability, mockPriority1, null); + ContainerId mockCId2 = ContainerId.newContainerId(attemptId, 2); + Container mockContainer2 = Container.newInstance(mockCId2, host2, null, mockCapability, mockPriority2, null); + ContainerId mockCId3 = ContainerId.newContainerId(attemptId, 3); + Container mockContainer3 = Container.newInstance(mockCId3, host3, null, mockCapability, mockPriority3, null); + ContainerId mockCId4 = ContainerId.newContainerId(attemptId, 4); + Container mockContainer4 = Container.newInstance(mockCId4, host4, null, mockCapability, mockPriority4, null); // sending lower priority container first to make sure its not matched - Container mockContainer4 = mock(Container.class, RETURNS_DEEP_STUBS); - when(mockContainer4.getNodeId().getHost()).thenReturn("host4"); - when(mockContainer4.toString()).thenReturn("container4"); - when(mockContainer4.getPriority()).thenReturn(mockPriority4); - ContainerId mockCId4 = mock(ContainerId.class); - when(mockContainer4.getId()).thenReturn(mockCId4); - when(mockCId4.toString()).thenReturn("container4"); + List containers = new ArrayList(); containers.add(mockContainer4); - Container mockContainer1 = mock(Container.class, RETURNS_DEEP_STUBS); - when(mockContainer1.getNodeId().getHost()).thenReturn("host1"); - when(mockContainer1.getPriority()).thenReturn(mockPriority1); - when(mockContainer1.toString()).thenReturn("container1"); - ContainerId mockCId1 = mock(ContainerId.class); - when(mockContainer1.getId()).thenReturn(mockCId1); - when(mockCId1.toString()).thenReturn("container1"); containers.add(mockContainer1); - Container mockContainer2 = mock(Container.class, RETURNS_DEEP_STUBS); - when(mockContainer2.getNodeId().getHost()).thenReturn("host2"); - when(mockContainer2.getPriority()).thenReturn(mockPriority2); - when(mockContainer2.toString()).thenReturn("container2"); - ContainerId mockCId2 = mock(ContainerId.class); - when(mockContainer2.getId()).thenReturn(mockCId2); - when(mockCId2.toString()).thenReturn("container2"); containers.add(mockContainer2); - Container mockContainer3 = mock(Container.class, RETURNS_DEEP_STUBS); - when(mockContainer3.getNodeId().getHost()).thenReturn("host3"); - when(mockContainer3.getPriority()).thenReturn(mockPriority3); - when(mockContainer3.toString()).thenReturn("container3"); - ContainerId mockCId3 = mock(ContainerId.class); - when(mockContainer3.getId()).thenReturn(mockCId3); - when(mockCId3.toString()).thenReturn("container3"); containers.add(mockContainer3); AtomicBoolean drainNotifier = new AtomicBoolean(false); @@ -660,25 +637,17 @@ public void testTaskSchedulerWithReuse() throws Exception { // verify blacklisting verify(mockRMClient, times(0)).addNodeToBlacklist((NodeId)any()); String badHost = "host6"; - NodeId badNodeId = mock(NodeId.class); - when(badNodeId.getHost()).thenReturn(badHost); + NodeId badNodeId = NodeId.newInstance(badHost, 1); scheduler.blacklistNode(badNodeId); verify(mockRMClient, times(1)).addNodeToBlacklist(badNodeId); - Object mockTask4 = mock(Object.class); - when(mockTask4.toString()).thenReturn("task4"); - Object mockCookie4 = mock(Object.class); + Object mockTask4 = new MockTask("task4"); + Object mockCookie4 = new Object(); scheduler.allocateTask(mockTask4, mockCapability, null, null, mockPriority4, null, mockCookie4); drainableAppCallback.drain(); verify(mockRMClient, times(4)).addContainerRequest(requestCaptor.capture()); - Container mockContainer5 = mock(Container.class, RETURNS_DEEP_STUBS); - when(mockContainer5.getNodeId().getHost()).thenReturn(badHost); - when(mockContainer5.getNodeId()).thenReturn(badNodeId); - ContainerId mockCId5 = mock(ContainerId.class); - when(mockContainer5.toString()).thenReturn("container5"); - when(mockCId5.toString()).thenReturn("container5"); - when(mockContainer5.getId()).thenReturn(mockCId5); - when(mockContainer5.getPriority()).thenReturn(mockPriority4); + ContainerId mockCId5 = ContainerId.newContainerId(attemptId, 5); + Container mockContainer5 = Container.newInstance(mockCId5, badNodeId, null, mockCapability, mockPriority4, null); containers.clear(); containers.add(mockContainer5); drainNotifier.set(false); @@ -692,12 +661,9 @@ public void testTaskSchedulerWithReuse() throws Exception { verify(mockRMClient, times(4)).releaseAssignedContainer((ContainerId) any()); // verify request added back verify(mockRMClient, times(5)).addContainerRequest(requestCaptor.capture()); - Container mockContainer6 = mock(Container.class, RETURNS_DEEP_STUBS); - when(mockContainer6.getNodeId().getHost()).thenReturn("host7"); - ContainerId mockCId6 = mock(ContainerId.class); - when(mockContainer6.getId()).thenReturn(mockCId6); - when(mockContainer6.toString()).thenReturn("container6"); - when(mockCId6.toString()).thenReturn("container6"); + NodeId host7 = NodeId.newInstance("host7", 7); + ContainerId mockCId6 = ContainerId.newContainerId(attemptId, 6); + Container mockContainer6 = Container.newInstance(mockCId6, host7, null, mockCapability, mockPriority4, null); containers.clear(); containers.add(mockContainer6); drainNotifier.set(false); @@ -720,9 +686,8 @@ public void testTaskSchedulerWithReuse() throws Exception { // verify container level matching // add a dummy task to prevent release of allocated containers - Object mockTask5 = mock(Object.class); - when(mockTask5.toString()).thenReturn("task5"); - Object mockCookie5 = mock(Object.class); + Object mockTask5 = new MockTask("task5"); + Object mockCookie5 = new Object(); scheduler.allocateTask(mockTask5, mockCapability, hosts, racks, mockPriority5, null, mockCookie5); verify(mockRMClient, times(6)).addContainerRequest(requestCaptor.capture()); @@ -730,29 +695,19 @@ public void testTaskSchedulerWithReuse() throws Exception { // add containers so that we can reference one of them for container specific // allocation containers.clear(); - Container mockContainer7 = mock(Container.class, RETURNS_DEEP_STUBS); - when(mockContainer7.getNodeId().getHost()).thenReturn("host5"); - ContainerId mockCId7 = mock(ContainerId.class); - when(mockContainer7.toString()).thenReturn("container7"); - when(mockCId7.toString()).thenReturn("container7"); - when(mockContainer7.getId()).thenReturn(mockCId7); - when(mockContainer7.getPriority()).thenReturn(mockPriority5); + NodeId host5 = NodeId.newInstance("host5", 5); + ContainerId mockCId7 = ContainerId.newContainerId(attemptId, 7); + Container mockContainer7 = Container.newInstance(mockCId7, host5, null, mockCapability, mockPriority5, null); containers.add(mockContainer7); - Container mockContainer8 = mock(Container.class, RETURNS_DEEP_STUBS); - when(mockContainer8.getNodeId().getHost()).thenReturn("host5"); - ContainerId mockCId8 = mock(ContainerId.class); - when(mockContainer8.toString()).thenReturn("container8"); - when(mockCId8.toString()).thenReturn("container8"); - when(mockContainer8.getId()).thenReturn(mockCId8); - when(mockContainer8.getPriority()).thenReturn(mockPriority5); + ContainerId mockCId8 = ContainerId.newContainerId(attemptId, 8); + Container mockContainer8 = Container.newInstance(mockCId8, host5, null, mockCapability, mockPriority5, null); containers.add(mockContainer8); drainNotifier.set(false); scheduler.onContainersAllocated(containers); drainableAppCallback.drain(); verify(mockRMClient, times(5)).releaseAssignedContainer((ContainerId) any()); - Object mockTask6 = mock(Object.class); - when(mockTask6.toString()).thenReturn("task6"); - Object mockCookie6 = mock(Object.class); + Object mockTask6 = new MockTask("task6"); + Object mockCookie6 = new Object(); // allocate request with container affinity scheduler.allocateTask(mockTask6, mockCapability, mockCId7, mockPriority5, null, mockCookie6); drainableAppCallback.drain(); @@ -788,22 +743,21 @@ public void testTaskSchedulerWithReuse() throws Exception { scheduler.onError(mockException); drainableAppCallback.drain(); verify(mockApp).reportError(eq(YarnTaskSchedulerServiceError.RESOURCEMANAGER_ERROR), argumentCaptor.capture(), - any(DagInfo.class)); + any()); assertTrue(argumentCaptor.getValue().contains("mockexception")); scheduler.onShutdownRequest(); drainableAppCallback.drain(); verify(mockApp).appShutdownRequested(); - String appMsg = "success"; AppFinalStatus finalStatus = - new AppFinalStatus(FinalApplicationStatus.SUCCEEDED, appMsg, appUrl); + new AppFinalStatus(FinalApplicationStatus.SUCCEEDED, SUCCEED_APP_MESSAGE, DEFAULT_APP_URL); when(mockApp.getFinalAppStatus()).thenReturn(finalStatus); scheduler.shutdown(); drainableAppCallback.drain(); verify(mockRMClient). unregisterApplicationMaster(FinalApplicationStatus.SUCCEEDED, - appMsg, appUrl); + SUCCEED_APP_MESSAGE, DEFAULT_APP_URL); verify(mockRMClient).stop(); } @@ -812,12 +766,8 @@ public void testTaskSchedulerDetermineMinHeldContainers() throws Exception { TezAMRMClientAsync mockRMClient = spy( new AMRMClientAsyncForTest(new AMRMClientForTest(), 100)); - String appHost = "host"; - int appPort = 0; - String appUrl = "url"; - - TaskSchedulerContext mockApp = setupMockTaskSchedulerContext(appHost, appPort, appUrl, true, - new Configuration()); + TaskSchedulerContext mockApp = setupMockTaskSchedulerContext(DEFAULT_APP_HOST, DEFAULT_APP_PORT, DEFAULT_APP_URL, + true, new Configuration()); final TaskSchedulerContextDrainable drainableAppCallback = createDrainableContext(mockApp); TaskSchedulerWithDrainableContext scheduler = @@ -836,69 +786,49 @@ public void testTaskSchedulerDetermineMinHeldContainers() throws Exception { String node1Rack3 = "n1r3"; ApplicationAttemptId appId = ApplicationAttemptId.newInstance(ApplicationId.newInstance(0, 0), 0); + NodeId emptyHost = NodeId.newInstance("", 1); Resource r = Resource.newInstance(0, 0); ContainerId mockCId1 = ContainerId.newInstance(appId, 0); - Container c1 = mock(Container.class, RETURNS_DEEP_STUBS); - when(c1.getNodeId().getHost()).thenReturn(""); // we are mocking directly - HeldContainer hc1 = Mockito.spy(new HeldContainer(c1, 0, 0, null, containerSignatureMatcher)); + Container c1 = Container.newInstance(mockCId1, emptyHost, null, r, null, null); + HeldContainer hc1 = spy(new HeldContainer(c1, 0, 0, null, containerSignatureMatcher)); when(hc1.getNode()).thenReturn(node1Rack1); when(hc1.getRack()).thenReturn(rack1); - when(c1.getId()).thenReturn(mockCId1); - when(c1.getResource()).thenReturn(r); when(hc1.getContainer()).thenReturn(c1); ContainerId mockCId2 = ContainerId.newInstance(appId, 1); - Container c2 = mock(Container.class, RETURNS_DEEP_STUBS); - when(c2.getNodeId().getHost()).thenReturn(""); // we are mocking directly - HeldContainer hc2 = Mockito.spy(new HeldContainer(c2, 0, 0, null, containerSignatureMatcher)); + Container c2 = Container.newInstance(mockCId2, emptyHost, null, r, null, null); + HeldContainer hc2 = spy(new HeldContainer(c2, 0, 0, null, containerSignatureMatcher)); when(hc2.getNode()).thenReturn(node2Rack1); when(hc2.getRack()).thenReturn(rack1); - when(c2.getId()).thenReturn(mockCId2); - when(c2.getResource()).thenReturn(r); when(hc2.getContainer()).thenReturn(c2); ContainerId mockCId3 = ContainerId.newInstance(appId, 2); - Container c3 = mock(Container.class, RETURNS_DEEP_STUBS); - when(c3.getNodeId().getHost()).thenReturn(""); // we are mocking directly - HeldContainer hc3 = Mockito.spy(new HeldContainer(c3, 0, 0, null, containerSignatureMatcher)); + Container c3 = Container.newInstance(mockCId3, emptyHost, null, r, null, null); + HeldContainer hc3 = spy(new HeldContainer(c3, 0, 0, null, containerSignatureMatcher)); when(hc3.getNode()).thenReturn(node1Rack1); when(hc3.getRack()).thenReturn(rack1); - when(c3.getId()).thenReturn(mockCId3); - when(c3.getResource()).thenReturn(r); when(hc3.getContainer()).thenReturn(c3); ContainerId mockCId4 = ContainerId.newInstance(appId, 3); - Container c4 = mock(Container.class, RETURNS_DEEP_STUBS); - when(c4.getNodeId().getHost()).thenReturn(""); // we are mocking directly - HeldContainer hc4 = Mockito.spy(new HeldContainer(c4, 0, 0, null, containerSignatureMatcher)); + Container c4 = Container.newInstance(mockCId4, emptyHost, null, r, null, null); + HeldContainer hc4 = spy(new HeldContainer(c4, 0, 0, null, containerSignatureMatcher)); when(hc4.getNode()).thenReturn(node2Rack1); when(hc4.getRack()).thenReturn(rack1); - when(c4.getId()).thenReturn(mockCId4); - when(c4.getResource()).thenReturn(r); when(hc4.getContainer()).thenReturn(c4); ContainerId mockCId5 = ContainerId.newInstance(appId, 4); - Container c5 = mock(Container.class, RETURNS_DEEP_STUBS); - when(c5.getNodeId().getHost()).thenReturn(""); // we are mocking directly - HeldContainer hc5 = Mockito.spy(new HeldContainer(c5, 0, 0, null, containerSignatureMatcher)); + Container c5 = Container.newInstance(mockCId5, emptyHost, null, r, null, null); + HeldContainer hc5 = spy(new HeldContainer(c5, 0, 0, null, containerSignatureMatcher)); when(hc5.getNode()).thenReturn(node1Rack2); when(hc5.getRack()).thenReturn(rack2); - when(c5.getId()).thenReturn(mockCId5); - when(c5.getResource()).thenReturn(r); when(hc5.getContainer()).thenReturn(c5); ContainerId mockCId6 = ContainerId.newInstance(appId, 5); - Container c6 = mock(Container.class, RETURNS_DEEP_STUBS); - when(c6.getNodeId().getHost()).thenReturn(""); // we are mocking directly - HeldContainer hc6 = Mockito.spy(new HeldContainer(c6, 0, 0, null, containerSignatureMatcher)); + Container c6 = Container.newInstance(mockCId6, emptyHost, null, r, null, null); + HeldContainer hc6 = spy(new HeldContainer(c6, 0, 0, null, containerSignatureMatcher)); when(hc6.getNode()).thenReturn(node2Rack2); when(hc6.getRack()).thenReturn(rack2); - when(c6.getId()).thenReturn(mockCId6); - when(c6.getResource()).thenReturn(r); when(hc6.getContainer()).thenReturn(c6); ContainerId mockCId7 = ContainerId.newInstance(appId, 6); - Container c7 = mock(Container.class, RETURNS_DEEP_STUBS); - when(c7.getNodeId().getHost()).thenReturn(""); // we are mocking directly - HeldContainer hc7 = Mockito.spy(new HeldContainer(c7, 0, 0, null, containerSignatureMatcher)); + Container c7 = Container.newInstance(mockCId7, emptyHost, null, r, null, null); + HeldContainer hc7 = spy(new HeldContainer(c7, 0, 0, null, containerSignatureMatcher)); when(hc7.getNode()).thenReturn(node1Rack3); when(hc7.getRack()).thenReturn(rack3); - when(c7.getId()).thenReturn(mockCId7); - when(c7.getResource()).thenReturn(r); when(hc7.getContainer()).thenReturn(c7); scheduler.heldContainers.put(mockCId1, hc1); @@ -959,9 +889,46 @@ public void testTaskSchedulerDetermineMinHeldContainers() throws Exception { verify(mockRMClient, times(2)).releaseAssignedContainer((ContainerId)any()); Assert.assertEquals(5, scheduler.heldContainers.size()); - String appMsg = "success"; AppFinalStatus finalStatus = - new AppFinalStatus(FinalApplicationStatus.SUCCEEDED, appMsg, appUrl); + new AppFinalStatus(FinalApplicationStatus.SUCCEEDED, SUCCEED_APP_MESSAGE, DEFAULT_APP_URL); + when(mockApp.getFinalAppStatus()).thenReturn(finalStatus); + scheduler.shutdown(); + } + + @Test (timeout=3000) + public void testTaskSchedulerHeldContainersReleaseAfterExpired() throws Exception { + final TezAMRMClientAsync mockRMClient = spy( + new AMRMClientAsyncForTest(new AMRMClientForTest(), 100)); + final TaskSchedulerContext mockApp = setupMockTaskSchedulerContext(DEFAULT_APP_HOST, DEFAULT_APP_PORT, + DEFAULT_APP_URL, true, new Configuration()); + final TaskSchedulerContextDrainable drainableAppCallback = createDrainableContext(mockApp); + final TaskSchedulerWithDrainableContext scheduler = + new TaskSchedulerWithDrainableContext(drainableAppCallback, mockRMClient); + + scheduler.initialize(); + scheduler.start(); + + Resource mockCapability = Resource.newInstance(1024, 1); + NodeId emptyHost = NodeId.newInstance("", 1); + ApplicationAttemptId appId = ApplicationAttemptId.newInstance(ApplicationId.newInstance(0, 0), 0); + ContainerId containerId = ContainerId.newInstance(appId, 0); + Container c1 = Container.newInstance(containerId, emptyHost, null, mockCapability, null, null); + + HeldContainer hc1 = new HeldContainer(c1, -1, -1, null, containerSignatureMatcher); + + // containerExpiryTime = 0 + scheduler.heldContainers.put(containerId, hc1); + + long currTime = System.currentTimeMillis(); + scheduler.delayedContainerManager.addDelayedContainer(hc1.getContainer(), currTime); + // sleep and wait for mainLoop() check-in to release this expired held container. + Thread.sleep(1000); + + verify(mockRMClient, times(1)).releaseAssignedContainer((ContainerId)any()); + Assert.assertEquals(0, scheduler.heldContainers.size()); + + AppFinalStatus finalStatus = + new AppFinalStatus(FinalApplicationStatus.SUCCEEDED, SUCCEED_APP_MESSAGE, DEFAULT_APP_URL); when(mockApp.getFinalAppStatus()).thenReturn(finalStatus); scheduler.shutdown(); } @@ -971,10 +938,6 @@ public void testTaskSchedulerRandomReuseExpireTime() throws Exception { TezAMRMClientAsync mockRMClient = spy( new AMRMClientAsyncForTest(new AMRMClientForTest(), 100)); - String appHost = "host"; - int appPort = 0; - String appUrl = "url"; - long minTime = 1000l; long maxTime = 100000l; Configuration conf1 = new Configuration(); @@ -985,8 +948,8 @@ public void testTaskSchedulerRandomReuseExpireTime() throws Exception { conf2.setLong(TezConfiguration.TEZ_AM_CONTAINER_IDLE_RELEASE_TIMEOUT_MIN_MILLIS, minTime); conf2.setLong(TezConfiguration.TEZ_AM_CONTAINER_IDLE_RELEASE_TIMEOUT_MAX_MILLIS, maxTime); - TaskSchedulerContext mockApp1 = setupMockTaskSchedulerContext(appHost, appPort, appUrl, conf1); - TaskSchedulerContext mockApp2 = setupMockTaskSchedulerContext(appHost, appPort, appUrl, conf2); + TaskSchedulerContext mockApp1 = setupMockTaskSchedulerContext(DEFAULT_APP_HOST, DEFAULT_APP_PORT, DEFAULT_APP_URL, conf1); + TaskSchedulerContext mockApp2 = setupMockTaskSchedulerContext(DEFAULT_APP_HOST, DEFAULT_APP_PORT, DEFAULT_APP_URL, conf2); final TaskSchedulerContextDrainable drainableAppCallback1 = createDrainableContext(mockApp1); final TaskSchedulerContextDrainable drainableAppCallback2 = createDrainableContext(mockApp2); @@ -1018,31 +981,127 @@ public void testTaskSchedulerRandomReuseExpireTime() throws Exception { lastExpireTime = currExpireTime; } - String appMsg = "success"; AppFinalStatus finalStatus = - new AppFinalStatus(FinalApplicationStatus.SUCCEEDED, appMsg, appUrl); + new AppFinalStatus(FinalApplicationStatus.SUCCEEDED, SUCCEED_APP_MESSAGE, DEFAULT_APP_URL); when(mockApp1.getFinalAppStatus()).thenReturn(finalStatus); when(mockApp2.getFinalAppStatus()).thenReturn(finalStatus); scheduler1.shutdown(); scheduler2.shutdown(); } + @Test(timeout = 5000) + public void testTaskSchedulerPreemptionWithLowAndHighPriorityRequests() throws Exception { + TezAMRMClientAsync mockRMClient = spy( + new AMRMClientAsyncForTest(new AMRMClientForTest(), 100)); + + Configuration conf = new Configuration(); + conf.setInt(TezConfiguration.TEZ_AM_PREEMPTION_PERCENTAGE, 50); + + TaskSchedulerContext mockApp = setupMockTaskSchedulerContext(DEFAULT_APP_HOST, DEFAULT_APP_PORT, DEFAULT_APP_URL, + false, null, null, new PreemptionMatcher(), conf); + final TaskSchedulerContextDrainable drainableAppCallback = createDrainableContext(mockApp); + + final TaskSchedulerWithDrainableContext scheduler = + new TaskSchedulerWithDrainableContext(drainableAppCallback, mockRMClient); + scheduler.initialize(); + scheduler.start(); + int initialRmCapacity = 4; + int lowPriorityTasks = 5; + int highPriorityTasks = 6; + Resource taskAsk = Resource.newInstance(1000, 1); + + Resource totalResource = Resource.newInstance(4000, 4); + when(mockRMClient.getAvailableResources()).thenReturn(totalResource); + + // Add lower priority tasks + Priority lowPriority = Priority.newInstance(74); + for (int i = 0; i < lowPriorityTasks; i++) { + Object low = new Object(); + TaskAttempt ta = mock(TaskAttempt.class); + scheduler.allocateTask(ta, taskAsk, null, null, lowPriority, low, null); + } + + scheduler.getProgress(); // Will update the highest priority + drainableAppCallback.drain(); + // 5 containers requested for lower priority tasks + verify(mockRMClient, times(5)).addContainerRequest(any(CookieContainerRequest.class)); + + // Allocate requested containers + List lowPriorityContainers = new ArrayList<>(); + for (int i = 0; i < initialRmCapacity; i++) { + ContainerId containerId = ContainerId.newContainerId( + ApplicationAttemptId.newInstance(ApplicationId.newInstance(1L, 1), 1), i); + NodeId nodeId = NodeId.newInstance("host-" + i, 8041); + Container container = Container.newInstance(containerId, nodeId, "host-" + i, taskAsk, lowPriority, null); + lowPriorityContainers.add(container); + } + + totalResource = Resource.newInstance(0, 0); + when(mockRMClient.getAvailableResources()).thenReturn(totalResource); + + // We don't want containers to be assigned to a task by delayedContainerManager as it invokes another preemption + // flow. Delayed thread first takes lock on delayedContainerManager instance to check if there are any containers + // We block the thread, ensure all delayed containers have schedule time beyond test's runtime to avoid assignment. + synchronized (scheduler.delayedContainerManager) { + scheduler.onContainersAllocated(lowPriorityContainers); + drainableAppCallback.drain(); + for (HeldContainer container : scheduler.delayedContainerManager.delayedContainers) { + // Set next schedule beyond this test's time to avoid any assignment + container.setNextScheduleTime(System.currentTimeMillis() + 10000); + // No preemption if assignment attempt of new container < 3 + container.incrementAssignmentAttempts(); + container.incrementAssignmentAttempts(); + container.incrementAssignmentAttempts(); + } + } + + // No releases so far + verify(mockRMClient, times(0)).releaseAssignedContainer(any()); + + // Add higher priority task + Priority highPriority = Priority.newInstance(71); + for (int i = 0; i < highPriorityTasks; i++) { + Object high = new Object(); + TaskAttempt ta = mock(TaskAttempt.class); + scheduler.allocateTask(ta, taskAsk, null, null, highPriority, high, null); + } + + drainableAppCallback.drain(); + // low priority tasks + high priority tasks + verify(mockRMClient, times(11)).addContainerRequest(any(CookieContainerRequest.class)); + + // Trigger preemption to release containers as 50% of pending high priority requests + scheduler.getProgress(); + drainableAppCallback.drain(); + + // 50% of 6 high priority requests = 3, 4 containers were held - hence 3 will be released + verify(mockRMClient, times(3)).releaseAssignedContainer(any()); + + // Trigger another preemption cycle + scheduler.getProgress(); + drainableAppCallback.drain(); + // 50% of 6 high priority requests = 3, but only 1 container is held - which will be released, + // incrementing total to 4 + verify(mockRMClient, times(4)).releaseAssignedContainer(any()); + AppFinalStatus finalStatus = + new AppFinalStatus(FinalApplicationStatus.SUCCEEDED, "", DEFAULT_APP_URL); + when(mockApp.getFinalAppStatus()).thenReturn(finalStatus); + scheduler.shutdown(); + drainableAppCallback.drain(); + } + @SuppressWarnings({ "unchecked", "rawtypes" }) @Test (timeout=5000) public void testTaskSchedulerPreemption() throws Exception { TezAMRMClientAsync mockRMClient = mock(TezAMRMClientAsync.class); - String appHost = "host"; - int appPort = 0; - String appUrl = "url"; - Configuration conf = new Configuration(); conf.setBoolean(TezConfiguration.TEZ_AM_CONTAINER_REUSE_ENABLED, false); conf.setInt(TezConfiguration.TEZ_AM_PREEMPTION_HEARTBEATS_BETWEEN_PREEMPTIONS, 3); - TaskSchedulerContext mockApp = setupMockTaskSchedulerContext(appHost, appPort, appUrl, false, - null, null, new PreemptionMatcher(), conf); + TaskSchedulerContext mockApp = setupMockTaskSchedulerContext(DEFAULT_APP_HOST, DEFAULT_APP_PORT, DEFAULT_APP_URL, + false, null, null, new PreemptionMatcher(), conf); final TaskSchedulerContextDrainable drainableAppCallback = createDrainableContext(mockApp); final TaskSchedulerWithDrainableContext scheduler = @@ -1067,14 +1126,14 @@ public void testTaskSchedulerPreemption() throws Exception { verify(mockRMClient, times(0)).releaseAssignedContainer((ContainerId)any()); // allocate task - Object mockTask1 = mock(Object.class); - Object mockTask2 = mock(Object.class); - Object mockTask3 = mock(Object.class); - Object mockTask3Wait = mock(Object.class); - Object mockTask3Retry = mock(Object.class); - Object mockTask3KillA = mock(Object.class); - Object mockTask3KillB = mock(Object.class); - Object mockTaskPri8 = mock(Object.class); + Object mockTask1 = new MockTask("task1"); + Object mockTask2 = new MockTask("task2"); + Object mockTask3 = new MockTask("task3"); + Object mockTask3Wait = new MockTask("task3Wait"); + Object mockTask3Retry = new MockTask("task3Retry"); + Object mockTask3KillA = new MockTask("task3KillA"); + Object mockTask3KillB = new MockTask("task3KillB"); + Object mockTaskPri8 = new MockTask("taskPri8"); Object obj3 = new Object(); Priority pri2 = Priority.newInstance(2); Priority pri4 = Priority.newInstance(4); @@ -1129,35 +1188,21 @@ public void testTaskSchedulerPreemption() throws Exception { new LinkedList>(); anyList.add(anyContainers); + NodeId host1 = NodeId.newInstance("host1", 1); + ApplicationAttemptId attemptId = ApplicationAttemptId.newInstance(ApplicationId.newInstance(1, 1), 1); + ContainerId mockCId1 = ContainerId.newContainerId(attemptId, 1); + Container mockContainer1 = Container.newInstance(mockCId1, host1, null, taskAsk, pri2, null); + ContainerId mockCId2 = ContainerId.newContainerId(attemptId, 2); + Container mockContainer2 = Container.newInstance(mockCId2, host1, null, taskAsk, pri6, null); + ContainerId mockCId3 = ContainerId.newContainerId(attemptId, 3); + Container mockContainer3 = Container.newInstance(mockCId3, host1, null, taskAsk, pri6, null); + ContainerId mockCId4 = ContainerId.newContainerId(attemptId, 4); + Container mockContainer4 = Container.newInstance(mockCId4, host1, null, taskAsk, pri2, null); List containers = new ArrayList(); - Container mockContainer1 = mock(Container.class, RETURNS_DEEP_STUBS); - when(mockContainer1.getNodeId().getHost()).thenReturn("host1"); - when(mockContainer1.getResource()).thenReturn(taskAsk); - when(mockContainer1.getPriority()).thenReturn(pri2); - ContainerId mockCId1 = mock(ContainerId.class); - when(mockContainer1.getId()).thenReturn(mockCId1); containers.add(mockContainer1); - Container mockContainer2 = mock(Container.class, RETURNS_DEEP_STUBS); - when(mockContainer2.getNodeId().getHost()).thenReturn("host1"); - when(mockContainer2.getResource()).thenReturn(taskAsk); - when(mockContainer2.getPriority()).thenReturn(pri6); - ContainerId mockCId2 = mock(ContainerId.class); - when(mockContainer2.getId()).thenReturn(mockCId2); containers.add(mockContainer2); - Container mockContainer3A = mock(Container.class, RETURNS_DEEP_STUBS); - when(mockContainer3A.getNodeId().getHost()).thenReturn("host1"); - when(mockContainer3A.getResource()).thenReturn(taskAsk); - when(mockContainer3A.getPriority()).thenReturn(pri6); - ContainerId mockCId3A = mock(ContainerId.class); - when(mockContainer3A.getId()).thenReturn(mockCId3A); - containers.add(mockContainer3A); - Container mockContainer3B = mock(Container.class, RETURNS_DEEP_STUBS); - when(mockContainer3B.getNodeId().getHost()).thenReturn("host1"); - when(mockContainer3B.getResource()).thenReturn(taskAsk); - when(mockContainer3B.getPriority()).thenReturn(pri2); // high priority container - ContainerId mockCId3B = mock(ContainerId.class); - when(mockContainer3B.getId()).thenReturn(mockCId3B); - containers.add(mockContainer3B); + containers.add(mockContainer3); + containers.add(mockContainer4); when( mockRMClient.getMatchingRequests((Priority) any(), eq("host1"), (Resource) any())).thenAnswer( @@ -1198,7 +1243,7 @@ public List> answer( }); - Mockito.doAnswer(new Answer() { + doAnswer(new Answer() { public Object answer(InvocationOnMock invocation) { Object[] args = invocation.getArguments(); ContainerId cId = (ContainerId) args[0]; @@ -1215,11 +1260,11 @@ public Object answer(InvocationOnMock invocation) { scheduler.taskAllocations.get(mockTask1).getId()); Assert.assertEquals(mockCId2, scheduler.taskAllocations.get(mockTask3).getId()); - Assert.assertEquals(mockCId3A, + Assert.assertEquals(mockCId3, scheduler.taskAllocations.get(mockTask3KillA).getId()); // high priority container assigned to lower pri task. This task should still be preempted // because the task priority is relevant for preemption and not the container priority - Assert.assertEquals(mockCId3B, + Assert.assertEquals(mockCId4, scheduler.taskAllocations.get(mockTask3KillB).getId()); // no preemption @@ -1242,19 +1287,15 @@ public Object answer(InvocationOnMock invocation) { drainableAppCallback.drain(); verify(mockRMClient, times(6)).addContainerRequest(requestCaptor.capture()); verify(mockRMClient, times(0)).releaseAssignedContainer((ContainerId)any()); - - Container mockContainer4 = mock(Container.class, RETURNS_DEEP_STUBS); - when(mockContainer4.getNodeId().getHost()).thenReturn("host1"); - when(mockContainer4.getResource()).thenReturn(taskAsk); - when(mockContainer4.getPriority()).thenReturn(pri8); - ContainerId mockCId4 = mock(ContainerId.class); - when(mockContainer4.getId()).thenReturn(mockCId4); + + ContainerId mockCId5 = ContainerId.newContainerId(attemptId, 5); + Container mockContainer5 = Container.newInstance(mockCId5, host1, null, taskAsk, pri8, null); containers.clear(); - containers.add(mockContainer4); + containers.add(mockContainer5); // new lower pri container added that wont be matched and eventually preempted // Fudge new container being present in delayed allocation list due to race - HeldContainer heldContainer = new HeldContainer(mockContainer4, -1, -1, null, + HeldContainer heldContainer = new HeldContainer(mockContainer5, -1, -1, null, containerSignatureMatcher); scheduler.delayedContainerManager.delayedContainers.add(heldContainer); // no preemption - container assignment attempts < 3 @@ -1275,7 +1316,7 @@ public Object answer(InvocationOnMock invocation) { scheduler.getProgress(); drainableAppCallback.drain(); verify(mockRMClient, times(1)).releaseAssignedContainer((ContainerId)any()); - verify(mockRMClient, times(1)).releaseAssignedContainer(mockCId4); + verify(mockRMClient, times(1)).releaseAssignedContainer(mockCId5); // internally re-request pri8 task request because we release pri8 new container verify(mockRMClient, times(7)).addContainerRequest(requestCaptor.capture()); CookieContainerRequest reAdded = requestCaptor.getValue(); @@ -1318,7 +1359,7 @@ public Object answer(InvocationOnMock invocation) { scheduler.getProgress(); // third heartbeat drainableAppCallback.drain(); verify(mockRMClient, times(2)).releaseAssignedContainer((ContainerId)any()); - verify(mockRMClient, times(1)).releaseAssignedContainer(mockCId3B); + verify(mockRMClient, times(1)).releaseAssignedContainer(mockCId4); Assert.assertEquals(scheduler.numHeartbeats, scheduler.heartbeatAtLastPreemption); // there are pending preemptions. scheduler.getProgress(); // first heartbeat @@ -1328,10 +1369,10 @@ public Object answer(InvocationOnMock invocation) { drainableAppCallback.drain(); // Next oldest mockTaskPri3KillA gets preempted to clear 10% of outstanding running preemptable tasks verify(mockRMClient, times(3)).releaseAssignedContainer((ContainerId)any()); - verify(mockRMClient, times(1)).releaseAssignedContainer(mockCId3A); + verify(mockRMClient, times(1)).releaseAssignedContainer(mockCId3); AppFinalStatus finalStatus = - new AppFinalStatus(FinalApplicationStatus.SUCCEEDED, "", appUrl); + new AppFinalStatus(FinalApplicationStatus.SUCCEEDED, "", DEFAULT_APP_URL); when(mockApp.getFinalAppStatus()).thenReturn(finalStatus); scheduler.shutdown(); drainableAppCallback.drain(); @@ -1342,10 +1383,6 @@ public void testTaskSchedulerPreemption2() throws Exception { TezAMRMClientAsync mockRMClient = spy( new AMRMClientAsyncForTest(new AMRMClientForTest(), 100)); - String appHost = "host"; - int appPort = 0; - String appUrl = "url"; - int waitTime = 1000; Configuration conf = new Configuration(); @@ -1353,8 +1390,8 @@ public void testTaskSchedulerPreemption2() throws Exception { conf.setInt(TezConfiguration.TEZ_AM_PREEMPTION_HEARTBEATS_BETWEEN_PREEMPTIONS, 2); conf.setInt(TezConfiguration.TEZ_AM_PREEMPTION_MAX_WAIT_TIME_MS, waitTime); - TaskSchedulerContext mockApp = setupMockTaskSchedulerContext(appHost, appPort, appUrl, false, - null, null, new PreemptionMatcher(), conf); + TaskSchedulerContext mockApp = setupMockTaskSchedulerContext(DEFAULT_APP_HOST, DEFAULT_APP_PORT, DEFAULT_APP_URL, + false, null, null, new PreemptionMatcher(), conf); final TaskSchedulerContextDrainable drainableAppCallback = createDrainableContext(mockApp); final TaskSchedulerWithDrainableContext scheduler = @@ -1371,9 +1408,9 @@ public void testTaskSchedulerPreemption2() throws Exception { verify(mockRMClient, times(0)).releaseAssignedContainer((ContainerId)any()); // allocate task - Object mockTask1 = mock(Object.class); - Object mockTask2 = mock(Object.class); - Object mockTask3 = mock(Object.class); + Object mockTask1 = new MockTask("task1"); + Object mockTask2 = new MockTask("task2"); + Object mockTask3 = new MockTask("task3"); Object obj3 = new Object(); Priority pri2 = Priority.newInstance(2); Priority pri4 = Priority.newInstance(4); @@ -1398,16 +1435,14 @@ public void testTaskSchedulerPreemption2() throws Exception { Assert.assertEquals(totalResource, scheduler.getTotalResources()); verify(mockRMClient, times(0)).releaseAssignedContainer((ContainerId)any()); + NodeId host1 = NodeId.newInstance("host1", 1); + ApplicationAttemptId attemptId = ApplicationAttemptId.newInstance(ApplicationId.newInstance(1, 1), 1); + ContainerId mockCId1 = ContainerId.newContainerId(attemptId, 1); + Container mockContainer1 = Container.newInstance(mockCId1, host1, null, taskAsk, pri4, null); List containers = new ArrayList(); - Container mockContainer1 = mock(Container.class, RETURNS_DEEP_STUBS); - when(mockContainer1.getNodeId().getHost()).thenReturn("host1"); - when(mockContainer1.getResource()).thenReturn(taskAsk); - when(mockContainer1.getPriority()).thenReturn(pri4); - ContainerId mockCId1 = mock(ContainerId.class); - when(mockContainer1.getId()).thenReturn(mockCId1); containers.add(mockContainer1); - Mockito.doAnswer(new Answer() { + doAnswer(new Answer() { public Object answer(InvocationOnMock invocation) { Object[] args = invocation.getArguments(); ContainerId cId = (ContainerId) args[0]; @@ -1491,7 +1526,7 @@ public Object answer(InvocationOnMock invocation) { Assert.assertTrue(oldStartWaitTime < scheduler.highestWaitingRequestWaitStartTime); AppFinalStatus finalStatus = - new AppFinalStatus(FinalApplicationStatus.SUCCEEDED, "", appUrl); + new AppFinalStatus(FinalApplicationStatus.SUCCEEDED, "", DEFAULT_APP_URL); when(mockApp.getFinalAppStatus()).thenReturn(finalStatus); scheduler.shutdown(); drainableAppCallback.drain(); @@ -1505,7 +1540,7 @@ public void testLocalityMatching() throws Exception { Configuration conf = new Configuration(); conf.setBoolean(TezConfiguration.TEZ_AM_CONTAINER_REUSE_ENABLED, false); - TaskSchedulerContext appClient = setupMockTaskSchedulerContext("host", 0, "", conf); + TaskSchedulerContext appClient = setupMockTaskSchedulerContext(DEFAULT_APP_HOST, DEFAULT_APP_PORT, "", conf); final TaskSchedulerContextDrainable drainableAppCallback = createDrainableContext(appClient); TaskSchedulerWithDrainableContext taskScheduler = @@ -1523,12 +1558,12 @@ public void testLocalityMatching() throws Exception { String defaultRack[] = { "/default-rack" }; String otherRack[] = { "/other-rack" }; - Object mockTask1 = mock(Object.class); + Object mockTask1 = new MockTask("task1"); CookieContainerRequest mockCookie1 = mock(CookieContainerRequest.class, RETURNS_DEEP_STUBS); when(mockCookie1.getCookie().getTask()).thenReturn(mockTask1); - Object mockTask2 = mock(Object.class); + Object mockTask2 = new MockTask("task2"); CookieContainerRequest mockCookie2 = mock(CookieContainerRequest.class, RETURNS_DEEP_STUBS); when(mockCookie2.getCookie().getTask()).thenReturn(mockTask2); @@ -1611,10 +1646,6 @@ public void testContainerExpired() throws Exception { TezAMRMClientAsync mockRMClient = spy( new AMRMClientAsyncForTest(new AMRMClientForTest(), 100)); - String appHost = "host"; - int appPort = 0; - String appUrl = "url"; - Configuration conf = new Configuration(); // to match all in the same pass conf.setLong(TezConfiguration.TEZ_AM_CONTAINER_REUSE_LOCALITY_DELAY_ALLOCATION_MILLIS, 0); @@ -1622,7 +1653,7 @@ public void testContainerExpired() throws Exception { conf.setLong(TezConfiguration.TEZ_AM_CONTAINER_IDLE_RELEASE_TIMEOUT_MIN_MILLIS, 0); conf.setLong(TezConfiguration.TEZ_AM_CONTAINER_IDLE_RELEASE_TIMEOUT_MAX_MILLIS, 0); - TaskSchedulerContext mockApp = setupMockTaskSchedulerContext(appHost, appPort, appUrl, conf); + TaskSchedulerContext mockApp = setupMockTaskSchedulerContext(DEFAULT_APP_HOST, DEFAULT_APP_PORT, DEFAULT_APP_URL, conf); final TaskSchedulerContextDrainable drainableAppCallback = createDrainableContext(mockApp); TaskSchedulerWithDrainableContext scheduler = new TaskSchedulerWithDrainableContext(drainableAppCallback, mockRMClient); @@ -1631,17 +1662,15 @@ public void testContainerExpired() throws Exception { scheduler.start(); drainableAppCallback.drain(); - Object mockTask1 = mock(Object.class); - when(mockTask1.toString()).thenReturn("task1"); - Object mockCookie1 = mock(Object.class); - Resource mockCapability = mock(Resource.class); + Object mockTask1 = new MockTask("task1"); + Object mockCookie1 = new Object(); + Resource mockCapability = Resource.newInstance(1024, 1); String[] hosts = {"host1", "host5"}; String[] racks = {"/default-rack", "/default-rack"}; final Priority mockPriority1 = Priority.newInstance(1); final Priority mockPriority2 = Priority.newInstance(2); - Object mockTask2 = mock(Object.class); - when(mockTask2.toString()).thenReturn("task2"); - Object mockCookie2 = mock(Object.class); + Object mockTask2 = new MockTask("task2"); + Object mockCookie2 = new Object(); ArgumentCaptor requestCaptor = ArgumentCaptor.forClass(CookieContainerRequest.class); @@ -1660,13 +1689,10 @@ public void testContainerExpired() throws Exception { List containers = new ArrayList(); // sending only lower priority container to make sure its not matched - Container mockContainer2 = mock(Container.class, RETURNS_DEEP_STUBS); - when(mockContainer2.getNodeId().getHost()).thenReturn("host2"); - when(mockContainer2.getPriority()).thenReturn(mockPriority2); - when(mockContainer2.toString()).thenReturn("container2"); - ContainerId mockCId2 = mock(ContainerId.class); - when(mockContainer2.getId()).thenReturn(mockCId2); - when(mockCId2.toString()).thenReturn("container2"); + NodeId host2 = NodeId.newInstance("host2", 2); + ApplicationAttemptId attemptId = ApplicationAttemptId.newInstance(ApplicationId.newInstance(1, 1), 1); + ContainerId mockCId2 = ContainerId.newContainerId(attemptId, 2); + Container mockContainer2 = Container.newInstance(mockCId2, host2, null, mockCapability, mockPriority2, null); containers.add(mockContainer2); scheduler.onContainersAllocated(containers); @@ -1677,7 +1703,7 @@ public void testContainerExpired() throws Exception { statuses.add(mockStatus2); scheduler.onContainersCompleted(statuses); - verify(mockApp, times(0)).taskAllocated(any(), any(), any(Container.class)); + verify(mockApp, times(0)).taskAllocated(any(), any(), any()); verify(mockRMClient, times(3)).addContainerRequest(requestCaptor.capture()); CookieContainerRequest resubmitRequest = requestCaptor.getValue(); assertEquals(request2.getCookie().getTask(), resubmitRequest.getCookie().getTask()); @@ -1690,7 +1716,7 @@ public void testContainerExpired() throws Exception { assertFalse(scheduler.deallocateTask(mockTask2, true, null, null)); scheduler.onContainersAllocated(containers); scheduler.onContainersCompleted(statuses); - verify(mockApp, times(0)).taskAllocated(any(), any(), any(Container.class)); + verify(mockApp, times(0)).taskAllocated(any(), any(), any()); verify(mockRMClient, times(3)).addContainerRequest(requestCaptor.capture()); } @@ -1704,4 +1730,17 @@ private Container createContainer(int id, String host, Resource resource, + ":0", resource, priority, null); return container; } + + static class MockTask { + final String name; + + MockTask(String name) { + this.name = name; + } + + @Override + public String toString() { + return name; + } + } } diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestTaskSchedulerHelpers.java b/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestTaskSchedulerHelpers.java index 2c2452b1f3..1421aa1510 100644 --- a/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestTaskSchedulerHelpers.java +++ b/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestTaskSchedulerHelpers.java @@ -18,8 +18,7 @@ package org.apache.tez.dag.app.rm; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.fail; +import static org.junit.Assert.*; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.spy; import static org.mockito.Mockito.when; @@ -43,8 +42,8 @@ import java.util.concurrent.TimeoutException; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; +import java.util.Objects; -import com.google.common.base.Preconditions; import com.google.common.collect.Lists; import com.google.common.collect.Maps; import org.apache.hadoop.conf.Configuration; @@ -76,7 +75,9 @@ import org.apache.tez.serviceplugins.api.TaskScheduler; import org.apache.tez.serviceplugins.api.TaskSchedulerContext; -class TestTaskSchedulerHelpers { +final class TestTaskSchedulerHelpers { + + private TestTaskSchedulerHelpers() {} // Mocking AMRMClientImpl to make use of getMatchingRequest static class AMRMClientForTest extends AMRMClientImpl { @@ -143,9 +144,8 @@ RegisterApplicationMasterResponse getRegistrationResponse() { static class TaskSchedulerManagerForTest extends TaskSchedulerManager { - private TezAMRMClientAsync amrmClientAsync; - private ContainerSignatureMatcher containerSignatureMatcher; - private UserPayload defaultPayload; + private final TezAMRMClientAsync amrmClientAsync; + private final UserPayload defaultPayload; @SuppressWarnings("rawtypes") public TaskSchedulerManagerForTest(AppContext appContext, @@ -157,7 +157,18 @@ public TaskSchedulerManagerForTest(AppContext appContext, Lists.newArrayList(new NamedEntityDescriptor("FakeScheduler", null)), false, new HadoopShimsLoader(appContext.getAMConf()).getHadoopShim()); this.amrmClientAsync = amrmClientAsync; - this.containerSignatureMatcher = containerSignatureMatcher; + this.defaultPayload = defaultPayload; + } + + TaskSchedulerManagerForTest(AppContext appContext, + EventHandler eventHandler, + TezAMRMClientAsync amrmClientAsync, + ContainerSignatureMatcher containerSignatureMatcher, + UserPayload defaultPayload, + List descriptors) { + super(appContext, null, eventHandler, containerSignatureMatcher, null, descriptors, + false, new HadoopShimsLoader(appContext.getAMConf()).getHadoopShim()); + this.amrmClientAsync = amrmClientAsync; this.defaultPayload = defaultPayload; } @@ -199,7 +210,7 @@ public void serviceStop() { @SuppressWarnings("rawtypes") static class CapturingEventHandler implements EventHandler { - private Queue events = new ConcurrentLinkedQueue(); + private final Queue events = new ConcurrentLinkedQueue(); public void handle(Event event) { events.add(event); @@ -211,7 +222,7 @@ public void reset() { public void verifyNoInvocations(Class eventClass) { for (Event e : events) { - assertFalse(e.getClass().getName().equals(eventClass.getName())); + assertNotEquals(e.getClass().getName(), eventClass.getName()); } } @@ -224,6 +235,10 @@ public Event verifyInvocation(Class eventClass) { fail("Expected Event: " + eventClass.getName() + " not sent"); return null; } + + public int getEventSize() { + return this.events.size(); + } } static class TaskSchedulerWithDrainableContext extends YarnTaskSchedulerService { @@ -245,8 +260,8 @@ public TaskSchedulerContextDrainable getDrainableAppCallback() { static class TaskSchedulerContextDrainable implements TaskSchedulerContext { int completedEvents; int invocations; - private TaskSchedulerContext real; - private CountingExecutorService countingExecutorService; + private final TaskSchedulerContext real; + private final CountingExecutorService countingExecutorService; final AtomicInteger count = new AtomicInteger(0); public TaskSchedulerContextDrainable(TaskSchedulerContextImplWrapper real) { @@ -261,6 +276,20 @@ public void taskAllocated(Object task, Object appCookie, Container container) { real.taskAllocated(task, appCookie, container); } + @Override + public void containerAllocated(Container container) { + count.incrementAndGet(); + invocations++; + real.containerAllocated(container); + } + + @Override + public void containerReused(Container container) { + count.incrementAndGet(); + invocations++; + real.containerReused(container); + } + @Override public void containerCompleted(Object taskLastAllocated, ContainerStatus containerStatus) { @@ -365,6 +394,11 @@ public AMState getAMState() { return real.getAMState(); } + @Override + public int getVertexIndexForTask(Object task) { + return real.getVertexIndexForTask(task); + } + @Override public void preemptContainer(ContainerId cId) { invocations++; @@ -387,8 +421,8 @@ static class AlwaysMatchesContainerMatcher implements ContainerSignatureMatcher @Override public boolean isSuperSet(Object cs1, Object cs2) { - Preconditions.checkNotNull(cs1, "Arguments cannot be null"); - Preconditions.checkNotNull(cs2, "Arguments cannot be null"); + Objects.requireNonNull(cs1, "Arguments cannot be null"); + Objects.requireNonNull(cs2, "Arguments cannot be null"); return true; } @@ -412,17 +446,14 @@ public Object union(Object cs1, Object cs2) { static class PreemptionMatcher implements ContainerSignatureMatcher { @Override public boolean isSuperSet(Object cs1, Object cs2) { - Preconditions.checkNotNull(cs1, "Arguments cannot be null"); - Preconditions.checkNotNull(cs2, "Arguments cannot be null"); + Objects.requireNonNull(cs1, "Arguments cannot be null"); + Objects.requireNonNull(cs2, "Arguments cannot be null"); return true; } @Override public boolean isExactMatch(Object cs1, Object cs2) { - if (cs1 == cs2 && cs1 != null) { - return true; - } - return false; + return cs1 == cs2 && cs1 != null; } @Override diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestTaskSchedulerManager.java b/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestTaskSchedulerManager.java index 43805f3de8..901a9df71b 100644 --- a/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestTaskSchedulerManager.java +++ b/tez-dag/src/test/java/org/apache/tez/dag/app/rm/TestTaskSchedulerManager.java @@ -23,10 +23,10 @@ import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; -import static org.mockito.Matchers.any; -import static org.mockito.Matchers.eq; import static org.mockito.Mockito.RETURNS_DEEP_STUBS; +import static org.mockito.Mockito.any; import static org.mockito.Mockito.doReturn; +import static org.mockito.Mockito.eq; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.reset; import static org.mockito.Mockito.spy; @@ -45,11 +45,17 @@ import java.util.List; import java.util.Set; import java.util.concurrent.Executors; +import java.util.concurrent.TimeoutException; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; +import com.google.common.base.Preconditions; +import com.google.common.base.Supplier; +import com.google.common.collect.BiMap; +import com.google.common.collect.HashBiMap; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.security.Credentials; +import org.apache.hadoop.util.Time; import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.Container; import org.apache.hadoop.yarn.api.records.ContainerExitStatus; @@ -59,6 +65,8 @@ import org.apache.hadoop.yarn.api.records.NodeId; import org.apache.hadoop.yarn.api.records.Priority; import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.client.api.AMRMClient; +import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.event.Event; import org.apache.hadoop.yarn.event.EventHandler; import org.apache.tez.common.ContainerSignatureMatcher; @@ -70,11 +78,18 @@ import org.apache.tez.dag.api.TezException; import org.apache.tez.dag.api.UserPayload; import org.apache.tez.dag.api.client.DAGClientServer; +import org.apache.tez.dag.api.oldrecords.TaskAttemptState; import org.apache.tez.dag.app.AppContext; +import org.apache.tez.dag.app.ClusterInfo; import org.apache.tez.dag.app.ContainerContext; +import org.apache.tez.dag.app.ContainerHeartbeatHandler; +import org.apache.tez.dag.app.DAGAppMaster; +import org.apache.tez.dag.app.DAGAppMasterState; import org.apache.tez.dag.app.ServicePluginLifecycleAbstractService; +import org.apache.tez.dag.app.TaskCommunicatorManagerInterface; import org.apache.tez.dag.app.dag.DAG; import org.apache.tez.dag.app.dag.TaskAttempt; +import org.apache.tez.dag.app.dag.event.DAGAppMasterEventSchedulingServiceError; import org.apache.tez.dag.app.dag.event.DAGAppMasterEventType; import org.apache.tez.dag.app.dag.event.DAGAppMasterEventUserServiceFatalError; import org.apache.tez.dag.app.dag.event.DAGEventTerminateDag; @@ -84,9 +99,12 @@ import org.apache.tez.dag.app.rm.container.AMContainer; import org.apache.tez.dag.app.rm.container.AMContainerEventAssignTA; import org.apache.tez.dag.app.rm.container.AMContainerEventCompleted; +import org.apache.tez.dag.app.rm.container.AMContainerEventStopRequest; import org.apache.tez.dag.app.rm.container.AMContainerEventType; import org.apache.tez.dag.app.rm.container.AMContainerMap; import org.apache.tez.dag.app.rm.container.AMContainerState; +import org.apache.tez.dag.app.rm.container.ContainerContextMatcher; +import org.apache.tez.dag.app.rm.node.AMNodeTracker; import org.apache.tez.dag.app.web.WebUIService; import org.apache.tez.dag.helpers.DagInfoImplForTest; import org.apache.tez.dag.records.TaskAttemptTerminationCause; @@ -157,7 +175,7 @@ protected void notifyForTest() { DAGClientServer mockClientService; TestEventHandler mockEventHandler; ContainerSignatureMatcher mockSigMatcher; - MockTaskSchedulerManager schedulerHandler; + MockTaskSchedulerManager taskSchedulerManager; TaskScheduler mockTaskScheduler; AMContainerMap mockAMContainerMap; WebUIService mockWebUIService; @@ -174,20 +192,20 @@ public void setup() { mockWebUIService = mock(WebUIService.class); when(mockAppContext.getAllContainers()).thenReturn(mockAMContainerMap); when(mockClientService.getBindAddress()).thenReturn(new InetSocketAddress(10000)); - schedulerHandler = new MockTaskSchedulerManager( + taskSchedulerManager = new MockTaskSchedulerManager( mockAppContext, mockClientService, mockEventHandler, mockSigMatcher, mockWebUIService); } @Test(timeout = 5000) public void testSimpleAllocate() throws Exception { Configuration conf = new Configuration(false); - schedulerHandler.init(conf); - schedulerHandler.start(); + taskSchedulerManager.init(conf); + taskSchedulerManager.start(); TaskAttemptImpl mockTaskAttempt = mock(TaskAttemptImpl.class); TezTaskAttemptID mockAttemptId = mock(TezTaskAttemptID.class); when(mockAttemptId.getId()).thenReturn(0); - when(mockTaskAttempt.getID()).thenReturn(mockAttemptId); + when(mockTaskAttempt.getTaskAttemptID()).thenReturn(mockAttemptId); Resource resource = Resource.newInstance(1024, 1); ContainerContext containerContext = new ContainerContext(new HashMap(), new Credentials(), @@ -208,20 +226,95 @@ public void testSimpleAllocate() throws Exception { AMSchedulerEventTALaunchRequest lr = new AMSchedulerEventTALaunchRequest(mockAttemptId, resource, null, mockTaskAttempt, locHint, priority, containerContext, 0, 0, 0); - schedulerHandler.taskAllocated(0, mockTaskAttempt, lr, container); + taskSchedulerManager.taskAllocated(0, mockTaskAttempt, lr, container); assertEquals(1, mockEventHandler.events.size()); assertTrue(mockEventHandler.events.get(0) instanceof AMContainerEventAssignTA); AMContainerEventAssignTA assignEvent = (AMContainerEventAssignTA) mockEventHandler.events.get(0); assertEquals(priority, assignEvent.getPriority()); assertEquals(mockAttemptId, assignEvent.getTaskAttemptId()); + + verify(mockAppContext.getCurrentDAG()).addUsedContainer(any(Container.class)); // called on taskAllocated + } + + @Test(timeout = 5000) + public void testTASucceededAfterContainerCleanup() throws Exception { + Configuration conf = new Configuration(false); + taskSchedulerManager.init(conf); + taskSchedulerManager.start(); + + TaskAttemptImpl mockTaskAttempt = mock(TaskAttemptImpl.class); + TezTaskAttemptID mockAttemptId = mock(TezTaskAttemptID.class); + when(mockAttemptId.getId()).thenReturn(0); + when(mockTaskAttempt.getTaskAttemptID()).thenReturn(mockAttemptId); + Resource resource = Resource.newInstance(1024, 1); + ContainerContext containerContext = + new ContainerContext(new HashMap(), new Credentials(), + new HashMap(), ""); + int priority = 10; + TaskLocationHint locHint = TaskLocationHint.createTaskLocationHint(new HashSet(), null); + + ContainerId mockCId = mock(ContainerId.class); + Container container = mock(Container.class); + when(container.getId()).thenReturn(mockCId); + + AMContainer mockAMContainer = mock(AMContainer.class); + when(mockAMContainer.getContainerId()).thenReturn(mockCId); + when(mockAMContainer.getState()).thenReturn(AMContainerState.IDLE); + + // Returning null container will replicate container cleanup scenario + when(mockAMContainerMap.get(mockCId)).thenReturn(null); + + AMSchedulerEventTALaunchRequest lr = + new AMSchedulerEventTALaunchRequest(mockAttemptId, resource, null, mockTaskAttempt, locHint, + priority, containerContext, 0, 0, 0); + taskSchedulerManager.taskAllocated(0, mockTaskAttempt, lr, container); + assertEquals(1, mockEventHandler.events.size()); + assertTrue(mockEventHandler.events.get(0) instanceof AMContainerEventAssignTA); + AMContainerEventAssignTA assignEvent = + (AMContainerEventAssignTA) mockEventHandler.events.get(0); + assertEquals(priority, assignEvent.getPriority()); + assertEquals(mockAttemptId, assignEvent.getTaskAttemptId()); + } + + @Test(timeout = 5000) + public void testTAUnsuccessfulAfterContainerCleanup() throws Exception { + Configuration conf = new Configuration(false); + taskSchedulerManager.init(conf); + taskSchedulerManager.start(); + + TaskAttemptImpl mockTaskAttempt = mock(TaskAttemptImpl.class); + TezTaskAttemptID mockAttemptId = mock(TezTaskAttemptID.class); + when(mockAttemptId.getId()).thenReturn(0); + when(mockTaskAttempt.getTaskAttemptID()).thenReturn(mockAttemptId); + + ContainerId mockCId = mock(ContainerId.class); + Container container = mock(Container.class); + when(container.getId()).thenReturn(mockCId); + + AMContainer mockAMContainer = mock(AMContainer.class); + when(mockAMContainer.getContainerId()).thenReturn(mockCId); + when(mockAMContainer.getState()).thenReturn(AMContainerState.IDLE); + when(mockTaskAttempt.getAssignedContainerID()).thenReturn(mockCId); + + // Returning null container will replicate container cleanup scenario + when(mockAMContainerMap.get(mockCId)).thenReturn(null); + + taskSchedulerManager.handleEvent( + new AMSchedulerEventTAEnded( + mockTaskAttempt, mockCId, TaskAttemptState.KILLED, null, null, 0)); + assertEquals(1, mockEventHandler.events.size()); + assertTrue(mockEventHandler.events.get(0) instanceof AMContainerEventStopRequest); + AMContainerEventStopRequest stopEvent = + (AMContainerEventStopRequest) mockEventHandler.events.get(0); + assertEquals(mockCId, stopEvent.getContainerId()); } @Test (timeout = 5000) public void testTaskBasedAffinity() throws Exception { Configuration conf = new Configuration(false); - schedulerHandler.init(conf); - schedulerHandler.start(); + taskSchedulerManager.init(conf); + taskSchedulerManager.start(); TaskAttemptImpl mockTaskAttempt = mock(TaskAttemptImpl.class); TezTaskAttemptID taId = mock(TezTaskAttemptID.class); @@ -240,11 +333,11 @@ public void testTaskBasedAffinity() throws Exception { Resource resource = Resource.newInstance(100, 1); AMSchedulerEventTALaunchRequest event = new AMSchedulerEventTALaunchRequest (taId, resource, null, mockTaskAttempt, locHint, 3, null, 0, 0, 0); - schedulerHandler.notify.set(false); - schedulerHandler.handle(event); - synchronized (schedulerHandler.notify) { - while (!schedulerHandler.notify.get()) { - schedulerHandler.notify.wait(); + taskSchedulerManager.notify.set(false); + taskSchedulerManager.handle(event); + synchronized (taskSchedulerManager.notify) { + while (!taskSchedulerManager.notify.get()) { + taskSchedulerManager.notify.wait(); } } @@ -252,15 +345,15 @@ public void testTaskBasedAffinity() throws Exception { verify(mockTaskScheduler, times(1)).allocateTask(mockTaskAttempt, resource, affCId, Priority.newInstance(3), null, event); - schedulerHandler.stop(); - schedulerHandler.close(); + taskSchedulerManager.stop(); + taskSchedulerManager.close(); } @Test (timeout = 5000) public void testContainerPreempted() throws IOException { Configuration conf = new Configuration(false); - schedulerHandler.init(conf); - schedulerHandler.start(); + taskSchedulerManager.init(conf); + taskSchedulerManager.start(); String diagnostics = "Container preempted by RM."; TaskAttemptImpl mockTask = mock(TaskAttemptImpl.class); @@ -272,7 +365,7 @@ public void testContainerPreempted() throws IOException { when(mockStatus.getContainerId()).thenReturn(mockCId); when(mockStatus.getDiagnostics()).thenReturn(diagnostics); when(mockStatus.getExitStatus()).thenReturn(ContainerExitStatus.PREEMPTED); - schedulerHandler.containerCompleted(0, mockTask, mockStatus); + taskSchedulerManager.containerCompleted(0, mockTask, mockStatus); assertEquals(1, mockEventHandler.events.size()); Event event = mockEventHandler.events.get(0); assertEquals(AMContainerEventType.C_COMPLETED, event.getType()); @@ -285,15 +378,15 @@ public void testContainerPreempted() throws IOException { completedEvent.getTerminationCause()); Assert.assertFalse(completedEvent.isDiskFailed()); - schedulerHandler.stop(); - schedulerHandler.close(); + taskSchedulerManager.stop(); + taskSchedulerManager.close(); } - + @Test (timeout = 5000) public void testContainerInternalPreempted() throws IOException, ServicePluginException { Configuration conf = new Configuration(false); - schedulerHandler.init(conf); - schedulerHandler.start(); + taskSchedulerManager.init(conf); + taskSchedulerManager.start(); AMContainer mockAmContainer = mock(AMContainer.class); when(mockAmContainer.getTaskSchedulerIdentifier()).thenReturn(0); @@ -302,7 +395,7 @@ public void testContainerInternalPreempted() throws IOException, ServicePluginEx ContainerId mockCId = mock(ContainerId.class); verify(mockTaskScheduler, times(0)).deallocateContainer((ContainerId) any()); when(mockAMContainerMap.get(mockCId)).thenReturn(mockAmContainer); - schedulerHandler.preemptContainer(0, mockCId); + taskSchedulerManager.preemptContainer(0, mockCId); verify(mockTaskScheduler, times(1)).deallocateContainer(mockCId); assertEquals(1, mockEventHandler.events.size()); Event event = mockEventHandler.events.get(0); @@ -315,15 +408,46 @@ public void testContainerInternalPreempted() throws IOException, ServicePluginEx assertEquals(TaskAttemptTerminationCause.INTERNAL_PREEMPTION, completedEvent.getTerminationCause()); - schedulerHandler.stop(); - schedulerHandler.close(); + taskSchedulerManager.stop(); + taskSchedulerManager.close(); + } + + @Test(timeout = 5000) + public void testContainerInternalPreemptedAfterContainerCleanup() throws IOException, ServicePluginException { + Configuration conf = new Configuration(false); + taskSchedulerManager.init(conf); + taskSchedulerManager.start(); + + AMContainer mockAmContainer = mock(AMContainer.class); + when(mockAmContainer.getTaskSchedulerIdentifier()).thenReturn(0); + when(mockAmContainer.getContainerLauncherIdentifier()).thenReturn(0); + when(mockAmContainer.getTaskCommunicatorIdentifier()).thenReturn(0); + ContainerId mockCId = mock(ContainerId.class); + verify(mockTaskScheduler, times(0)).deallocateContainer((ContainerId) any()); + // Returning null container will replicate container cleanup scenario + when(mockAMContainerMap.get(mockCId)).thenReturn(null); + taskSchedulerManager.preemptContainer(0, mockCId); + verify(mockTaskScheduler, times(0)).deallocateContainer(mockCId); + assertEquals(1, mockEventHandler.events.size()); + Event event = mockEventHandler.events.get(0); + assertEquals(AMContainerEventType.C_COMPLETED, event.getType()); + AMContainerEventCompleted completedEvent = (AMContainerEventCompleted) event; + assertEquals(mockCId, completedEvent.getContainerId()); + assertEquals("Container preempted internally", completedEvent.getDiagnostics()); + assertTrue(completedEvent.isPreempted()); + Assert.assertFalse(completedEvent.isDiskFailed()); + assertEquals(TaskAttemptTerminationCause.INTERNAL_PREEMPTION, + completedEvent.getTerminationCause()); + + taskSchedulerManager.stop(); + taskSchedulerManager.close(); } @Test (timeout = 5000) public void testContainerDiskFailed() throws IOException { Configuration conf = new Configuration(false); - schedulerHandler.init(conf); - schedulerHandler.start(); + taskSchedulerManager.init(conf); + taskSchedulerManager.start(); String diagnostics = "NM disk failed."; TaskAttemptImpl mockTask = mock(TaskAttemptImpl.class); @@ -335,7 +459,7 @@ public void testContainerDiskFailed() throws IOException { when(mockStatus.getContainerId()).thenReturn(mockCId); when(mockStatus.getDiagnostics()).thenReturn(diagnostics); when(mockStatus.getExitStatus()).thenReturn(ContainerExitStatus.DISKS_FAILED); - schedulerHandler.containerCompleted(0, mockTask, mockStatus); + taskSchedulerManager.containerCompleted(0, mockTask, mockStatus); assertEquals(1, mockEventHandler.events.size()); Event event = mockEventHandler.events.get(0); assertEquals(AMContainerEventType.C_COMPLETED, event.getType()); @@ -348,15 +472,15 @@ public void testContainerDiskFailed() throws IOException { assertEquals(TaskAttemptTerminationCause.NODE_DISK_ERROR, completedEvent.getTerminationCause()); - schedulerHandler.stop(); - schedulerHandler.close(); + taskSchedulerManager.stop(); + taskSchedulerManager.close(); } @Test (timeout = 5000) public void testContainerExceededPMem() throws IOException { Configuration conf = new Configuration(false); - schedulerHandler.init(conf); - schedulerHandler.start(); + taskSchedulerManager.init(conf); + taskSchedulerManager.start(); String diagnostics = "Exceeded Physical Memory"; TaskAttemptImpl mockTask = mock(TaskAttemptImpl.class); @@ -370,7 +494,7 @@ public void testContainerExceededPMem() throws IOException { // use -104 rather than ContainerExitStatus.KILLED_EXCEEDED_PMEM because // ContainerExitStatus.KILLED_EXCEEDED_PMEM is only available after hadoop-2.5 when(mockStatus.getExitStatus()).thenReturn(-104); - schedulerHandler.containerCompleted(0, mockTask, mockStatus); + taskSchedulerManager.containerCompleted(0, mockTask, mockStatus); assertEquals(1, mockEventHandler.events.size()); Event event = mockEventHandler.events.get(0); assertEquals(AMContainerEventType.C_COMPLETED, event.getType()); @@ -383,13 +507,13 @@ public void testContainerExceededPMem() throws IOException { assertEquals(TaskAttemptTerminationCause.CONTAINER_EXITED, completedEvent.getTerminationCause()); - schedulerHandler.stop(); - schedulerHandler.close(); + taskSchedulerManager.stop(); + taskSchedulerManager.close(); } @Test (timeout = 5000) public void testHistoryUrlConf() throws Exception { - Configuration conf = schedulerHandler.appContext.getAMConf(); + Configuration conf = taskSchedulerManager.appContext.getAMConf(); final ApplicationId mockApplicationId = mock(ApplicationId.class); doReturn("TEST_APP_ID").when(mockApplicationId).toString(); doReturn(mockApplicationId).when(mockAppContext).getApplicationID(); @@ -397,35 +521,35 @@ public void testHistoryUrlConf() throws Exception { // ensure history url is empty when timeline server is not the logging class conf.set(TezConfiguration.TEZ_HISTORY_URL_BASE, "http://ui-host:9999"); assertEquals("http://ui-host:9999/#/tez-app/TEST_APP_ID", - schedulerHandler.getHistoryUrl()); + taskSchedulerManager.getHistoryUrl()); // ensure the trailing / in history url is handled conf.set(TezConfiguration.TEZ_HISTORY_URL_BASE, "http://ui-host:9998/"); assertEquals("http://ui-host:9998/#/tez-app/TEST_APP_ID", - schedulerHandler.getHistoryUrl()); + taskSchedulerManager.getHistoryUrl()); // ensure missing scheme in history url is handled conf.set(TezConfiguration.TEZ_HISTORY_URL_BASE, "ui-host:9998/"); assertEquals("http://ui-host:9998/#/tez-app/TEST_APP_ID", - schedulerHandler.getHistoryUrl()); + taskSchedulerManager.getHistoryUrl()); // handle bad template ex without begining / conf.set(TezConfiguration.TEZ_AM_TEZ_UI_HISTORY_URL_TEMPLATE, "__HISTORY_URL_BASE__#/somepath"); assertEquals("http://ui-host:9998/#/somepath", - schedulerHandler.getHistoryUrl()); + taskSchedulerManager.getHistoryUrl()); conf.set(TezConfiguration.TEZ_AM_TEZ_UI_HISTORY_URL_TEMPLATE, "__HISTORY_URL_BASE__?viewPath=tez-app/__APPLICATION_ID__"); conf.set(TezConfiguration.TEZ_HISTORY_URL_BASE, "http://localhost/ui/tez"); assertEquals("http://localhost/ui/tez?viewPath=tez-app/TEST_APP_ID", - schedulerHandler.getHistoryUrl()); + taskSchedulerManager.getHistoryUrl()); } @Test (timeout = 5000) public void testHistoryUrlWithoutScheme() throws Exception { - Configuration conf = schedulerHandler.appContext.getAMConf(); + Configuration conf = taskSchedulerManager.appContext.getAMConf(); final ApplicationId mockApplicationId = mock(ApplicationId.class); doReturn("TEST_APP_ID").when(mockApplicationId).toString(); doReturn(mockApplicationId).when(mockAppContext).getApplicationID(); @@ -433,16 +557,16 @@ public void testHistoryUrlWithoutScheme() throws Exception { conf.set(TezConfiguration.TEZ_HISTORY_URL_BASE, "/foo/bar/"); conf.setBoolean(TezConfiguration.TEZ_AM_UI_HISTORY_URL_SCHEME_CHECK_ENABLED, false); assertEquals("/foo/bar/#/tez-app/TEST_APP_ID", - schedulerHandler.getHistoryUrl()); + taskSchedulerManager.getHistoryUrl()); conf.set(TezConfiguration.TEZ_HISTORY_URL_BASE, "ui-host:9998/foo/bar/"); assertEquals("ui-host:9998/foo/bar/#/tez-app/TEST_APP_ID", - schedulerHandler.getHistoryUrl()); + taskSchedulerManager.getHistoryUrl()); conf.setBoolean(TezConfiguration.TEZ_AM_UI_HISTORY_URL_SCHEME_CHECK_ENABLED, true); conf.set(TezConfiguration.TEZ_HISTORY_URL_BASE, "ui-host:9998/foo/bar/"); assertEquals("http://ui-host:9998/foo/bar/#/tez-app/TEST_APP_ID", - schedulerHandler.getHistoryUrl()); + taskSchedulerManager.getHistoryUrl()); } @Test(timeout = 5000) @@ -555,7 +679,7 @@ mockTaskAttempt1, mock(TaskLocationHint.class), 1, mock(ContainerContext.class), tseh.handle(launchRequest1); verify(tseh.getTestTaskScheduler(0)).allocateTask(eq(mockTaskAttempt1), eq(resource), - any(String[].class), any(String[].class), any(Priority.class), any(Object.class), + any(String[].class), any(String[].class), any(), any(), eq(launchRequest1)); AMSchedulerEventTALaunchRequest launchRequest2 = @@ -564,10 +688,27 @@ mockTaskAttempt2, mock(TaskLocationHint.class), 1, mock(ContainerContext.class), 0); tseh.handle(launchRequest2); verify(tseh.getTestTaskScheduler(1)).allocateTask(eq(mockTaskAttempt2), eq(resource), - any(String[].class), any(String[].class), any(Priority.class), any(Object.class), + any(String[].class), any(String[].class), any(), any(), eq(launchRequest2)); } + @SuppressWarnings("unchecked") + @Test(timeout = 5000) + public void testShutdownBeforeStartTaskScheduler() { + Configuration conf = new TezConfiguration(); + AppContext appContext = mock(AppContext.class, RETURNS_DEEP_STUBS); + doReturn(conf).when(appContext).getAMConf(); + + List list = new LinkedList<>(); + list.add(null); + + TaskSchedulerManager taskSchedulerManager = + new TaskSchedulerManager(appContext, null, null, + null, null, list, false,null); + assertFalse("Should not return true unless actually unregistered successfully", + taskSchedulerManager.hasUnregistered()); + } + @SuppressWarnings("unchecked") @Test(timeout = 5000) public void testReportFailureFromTaskScheduler() { @@ -716,6 +857,84 @@ protected void instantiateSchedulers(String host, int port, String trackingUrl, } } + @Test(timeout = 10000) + public void testHandleException() throws Exception { + Configuration tezConf = new Configuration(new YarnConfiguration()); + UserPayload defaultPayload = TezUtils.createUserPayloadFromConf(tezConf); + + // Parse plugins + List tsDescriptors = Lists.newLinkedList(); + BiMap tsMap = HashBiMap.create(); + DAGAppMaster.parseAllPlugins(tsDescriptors, tsMap, Lists.newLinkedList(), HashBiMap.create(), Lists.newLinkedList(), + HashBiMap.create(), null, false, defaultPayload); + + // Only TezYarn found. + Assert.assertEquals(1, tsDescriptors.size()); + Assert.assertEquals(TezConstants.getTezYarnServicePluginName(), tsDescriptors.get(0).getEntityName()); + + // Construct eventHandler + TestTaskSchedulerHelpers.CapturingEventHandler eventHandler = new TestTaskSchedulerHelpers.CapturingEventHandler(); + TezDAGID dagID = TezDAGID.getInstance("0", 0, 0); + + // Construct AMRMClient + AMRMClient rmClientCore = + new TestTaskSchedulerHelpers.AMRMClientForTest(); + TezAMRMClientAsync rmClient = + spy(new TestTaskSchedulerHelpers.AMRMClientAsyncForTest(rmClientCore, 100)); + + // Construct appContext + AppContext appContext = mock(AppContext.class); + doReturn(new Configuration(false)).when(appContext).getAMConf(); + AMContainerMap amContainerMap = new AMContainerMap(mock(ContainerHeartbeatHandler.class), + mock(TaskCommunicatorManagerInterface.class), new ContainerContextMatcher(), appContext); + AMNodeTracker amNodeTracker = new AMNodeTracker(eventHandler, appContext); + doReturn(amContainerMap).when(appContext).getAllContainers(); + doReturn(amNodeTracker).when(appContext).getNodeTracker(); + doReturn(DAGAppMasterState.RUNNING).when(appContext).getAMState(); + doReturn(dagID).when(appContext).getCurrentDAGID(); + doReturn(mock(ClusterInfo.class)).when(appContext).getClusterInfo(); + + // Construct TaskSchedulerManager + TaskSchedulerManager taskSchedulerManagerReal = + new TestTaskSchedulerHelpers.TaskSchedulerManagerForTest(appContext, eventHandler, rmClient, + new TestTaskSchedulerHelpers.AlwaysMatchesContainerMatcher(), defaultPayload, tsDescriptors); + TaskSchedulerManager taskSchedulerManager = spy(taskSchedulerManagerReal); + taskSchedulerManager.init(tezConf); + taskSchedulerManager.start(); + + // Send error to schedule, then expect DAGAppMasterEventSchedulingServiceError event. + YarnTaskSchedulerService scheduler = ((YarnTaskSchedulerService) taskSchedulerManager.getTaskScheduler(0)); + scheduler.onError(new Exception("Trigger by unit test")); + waitFor(() -> { + return eventHandler.getEventSize() > 0; + }, 1000, 5000); + eventHandler.verifyInvocation(DAGAppMasterEventSchedulingServiceError.class); + } + + @Test + public void testTaskSchedulerManangerHeldContainers() throws IOException { + Configuration conf = new Configuration(false); + UserPayload defaultPayload = TezUtils.createUserPayloadFromConf(conf); + + String customSchedulerName = "fakeScheduler"; + List taskSchedulers = new LinkedList<>(); + UserPayload userPayload = UserPayload.create(ByteBuffer.allocate(4)); + taskSchedulers.add( + new NamedEntityDescriptor(customSchedulerName, FakeTaskScheduler.class.getName()).setUserPayload(userPayload)); + taskSchedulers.add( + new NamedEntityDescriptor(TezConstants.getTezYarnServicePluginName(), null).setUserPayload(defaultPayload)); + + TSEHForMultipleSchedulersTest tseh = new TSEHForMultipleSchedulersTest(mockAppContext, mockClientService, + mockEventHandler, mockSigMatcher, mockWebUIService, taskSchedulers, false); + + tseh.init(conf); + tseh.start(); + + Assert.assertEquals(TSEHForMultipleSchedulersTest.YARN_TASK_SCHEDULER_HELD_CONTAINERS + + TSEHForMultipleSchedulersTest.CUSTOM_TASK_SCHEDULER_HELD_CONTAINERS, tseh.getHeldContainersCount()); + tseh.close(); + } + private static class ExceptionAnswer implements Answer { @Override public Object answer(InvocationOnMock invocation) throws Throwable { @@ -732,6 +951,8 @@ public Object answer(InvocationOnMock invocation) throws Throwable { public static class TSEHForMultipleSchedulersTest extends TaskSchedulerManager { + public static final Integer YARN_TASK_SCHEDULER_HELD_CONTAINERS = 3; + public static final Integer CUSTOM_TASK_SCHEDULER_HELD_CONTAINERS = 2; private final TaskScheduler yarnTaskScheduler; private final TaskScheduler uberTaskScheduler; private final AtomicBoolean uberSchedulerCreated = new AtomicBoolean(false); @@ -782,14 +1003,15 @@ TaskScheduler createYarnTaskScheduler(TaskSchedulerContext taskSchedulerContext, taskSchedulerContexts.add(taskSchedulerContext); testTaskSchedulers.add(yarnTaskScheduler); yarnSchedulerCreated.set(true); + when(yarnTaskScheduler.getHeldContainersCount()).thenReturn(YARN_TASK_SCHEDULER_HELD_CONTAINERS); return yarnTaskScheduler; } @Override TaskScheduler createUberTaskScheduler(TaskSchedulerContext taskSchedulerContext, int schedulerId) { taskSchedulerContexts.add(taskSchedulerContext); + testTaskSchedulers.add(uberTaskScheduler); uberSchedulerCreated.set(true); - testTaskSchedulers.add(yarnTaskScheduler); return uberTaskScheduler; } @@ -800,6 +1022,7 @@ TaskScheduler createCustomTaskScheduler(TaskSchedulerContext taskSchedulerContex taskSchedulerContexts.add(taskSchedulerContext); TaskScheduler taskScheduler = spy(super.createCustomTaskScheduler(taskSchedulerContext, taskSchedulerDescriptor, schedulerId)); testTaskSchedulers.add(taskScheduler); + when(taskScheduler.getHeldContainersCount()).thenReturn(CUSTOM_TASK_SCHEDULER_HELD_CONTAINERS); return taskScheduler; } @@ -904,6 +1127,11 @@ public void setShouldUnregister() { public boolean hasUnregistered() { return false; } + + @Override + public int getHeldContainersCount() { + return 0; + } } private static final String DAG_NAME = "dagName"; @@ -983,5 +1211,29 @@ public boolean hasUnregistered() throws ServicePluginException { @Override public void dagComplete() throws ServicePluginException { } + + @Override + public int getHeldContainersCount() { + return 0; + } + } + + public static void waitFor(Supplier check, int checkEveryMillis, + int waitForMillis) throws TimeoutException, InterruptedException { + Preconditions.checkNotNull(check, "Input supplier interface should be initailized"); + Preconditions.checkArgument(waitForMillis >= checkEveryMillis, + "Total wait time should be greater than check interval time"); + + long st = Time.monotonicNow(); + boolean result = check.get(); + + while (!result && (Time.monotonicNow() - st < waitForMillis)) { + Thread.sleep(checkEveryMillis); + result = check.get(); + } + + if (!result) { + throw new TimeoutException("Timed out waiting for condition."); + } } } diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/rm/container/TestAMContainer.java b/tez-dag/src/test/java/org/apache/tez/dag/app/rm/container/TestAMContainer.java index d3614d9ff2..3061ceaa88 100644 --- a/tez-dag/src/test/java/org/apache/tez/dag/app/rm/container/TestAMContainer.java +++ b/tez-dag/src/test/java/org/apache/tez/dag/app/rm/container/TestAMContainer.java @@ -23,10 +23,10 @@ import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertNull; import static org.junit.Assert.assertTrue; -import static org.mockito.Matchers.any; -import static org.mockito.Matchers.eq; -import static org.mockito.Mockito.doReturn; +import static org.mockito.Mockito.any; import static org.mockito.Mockito.doAnswer; +import static org.mockito.Mockito.doReturn; +import static org.mockito.Mockito.eq; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.never; import static org.mockito.Mockito.reset; @@ -1311,7 +1311,7 @@ protected void mockDAGID() { */ @SuppressWarnings("unchecked") public void verifyNoOutgoingEvents() { - verify(eventHandler, never()).handle(any(Event.class)); + verify(eventHandler, never()).handle(any()); } /** diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/rm/node/TestAMNodeTracker.java b/tez-dag/src/test/java/org/apache/tez/dag/app/rm/node/TestAMNodeTracker.java index 11d3b7ac21..060cdc4dcd 100644 --- a/tez-dag/src/test/java/org/apache/tez/dag/app/rm/node/TestAMNodeTracker.java +++ b/tez-dag/src/test/java/org/apache/tez/dag/app/rm/node/TestAMNodeTracker.java @@ -327,6 +327,33 @@ public void testNodeSelfBlacklistAlternateScheduler2() { } } + @Test(timeout=10000) + public void testMultipleAMNodeIDs() { + AppContext appContext = mock(AppContext.class); + Configuration conf = new Configuration(false); + conf.setInt(TezConfiguration.TEZ_AM_MAX_TASK_FAILURES_PER_NODE, 2); + TestEventHandler handler = new TestEventHandler(); + AMNodeTracker amNodeTracker = new AMNodeTracker(handler, appContext); + doReturn(amNodeTracker).when(appContext).getNodeTracker(); + AMContainerMap amContainerMap = mock(AMContainerMap.class); + TaskSchedulerManager taskSchedulerManager = + mock(TaskSchedulerManager.class); + dispatcher.register(AMNodeEventType.class, amNodeTracker); + dispatcher.register(AMContainerEventType.class, amContainerMap); + dispatcher.register(AMSchedulerEventType.class, taskSchedulerManager); + amNodeTracker.init(conf); + amNodeTracker.start(); + try { + amNodeTracker.nodeSeen(new ExtendedNodeId(NodeId.newInstance("host", 2222), "uuid1"), 0); + amNodeTracker.nodeSeen(new ExtendedNodeId(NodeId.newInstance("host", 2222), "uuid1"), 0); + amNodeTracker.nodeSeen(new ExtendedNodeId(NodeId.newInstance("host", 2222), "uuid2"), 0); + amNodeTracker.nodeSeen(new ExtendedNodeId(NodeId.newInstance("host", 2222), "uuid2"), 0); + assertEquals(2, amNodeTracker.getNumNodes(0)); + } finally { + amNodeTracker.stop(); + } + } + @Test(timeout = 10000L) public void testNodeCompletedAndCleanup() { AppContext appContext = mock(AppContext.class); @@ -401,15 +428,26 @@ public void testNodeCompletedAndCleanup() { @Test(timeout=10000) public void testNodeUnhealthyRescheduleTasksEnabled() throws Exception { - _testNodeUnhealthyRescheduleTasks(true); + _testNodeUnhealthyRescheduleTasks(true, false); } @Test(timeout=10000) public void testNodeUnhealthyRescheduleTasksDisabled() throws Exception { - _testNodeUnhealthyRescheduleTasks(false); + _testNodeUnhealthyRescheduleTasks(false, false); + } + + + @Test(timeout=10000) + public void testNodeUnhealthyRescheduleTasksEnabledAMNode() throws Exception { + _testNodeUnhealthyRescheduleTasks(true, true); + } + + @Test(timeout=10000) + public void testNodeUnhealthyRescheduleTasksDisabledAMNode() throws Exception { + _testNodeUnhealthyRescheduleTasks(false, true); } - private void _testNodeUnhealthyRescheduleTasks(boolean rescheduleTasks) { + private void _testNodeUnhealthyRescheduleTasks(boolean rescheduleTasks, boolean useExtendedNodeId) { AppContext appContext = mock(AppContext.class); Configuration conf = new Configuration(false); conf.setBoolean(TezConfiguration.TEZ_AM_NODE_UNHEALTHY_RESCHEDULE_TASKS, @@ -422,8 +460,14 @@ private void _testNodeUnhealthyRescheduleTasks(boolean rescheduleTasks) { // add a node amNodeTracker.handle(new AMNodeEventNodeCountUpdated(1, 0)); - NodeId nodeId = NodeId.newInstance("host1", 1234); - amNodeTracker.nodeSeen(nodeId, 0); + NodeId nodeId; + if (useExtendedNodeId) { + nodeId = new ExtendedNodeId(NodeId.newInstance("host1", 1234), "uuid2"); + amNodeTracker.nodeSeen(nodeId, 0); + } else { + nodeId = NodeId.newInstance("host1", 1234); + amNodeTracker.nodeSeen(nodeId, 0); + } AMNodeImpl node = (AMNodeImpl) amNodeTracker.get(nodeId, 0); // simulate task starting on node diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/web/TestAMWebController.java b/tez-dag/src/test/java/org/apache/tez/dag/app/web/TestAMWebController.java index 16b391b55f..ced9ca0abe 100644 --- a/tez-dag/src/test/java/org/apache/tez/dag/app/web/TestAMWebController.java +++ b/tez-dag/src/test/java/org/apache/tez/dag/app/web/TestAMWebController.java @@ -18,11 +18,11 @@ package org.apache.tez.dag.app.web; -import static org.mockito.Matchers.any; -import static org.mockito.Matchers.anyString; -import static org.mockito.Matchers.eq; +import static org.mockito.Mockito.any; +import static org.mockito.Mockito.anyString; import static org.mockito.Mockito.doNothing; import static org.mockito.Mockito.doReturn; +import static org.mockito.Mockito.eq; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.reset; import static org.mockito.Mockito.spy; @@ -640,7 +640,7 @@ Map getTasksTestHelper(List tasks, List > t //Creating mock tasks and attaching to mock vertex Map taskMap = Maps.newHashMap(); for(Task task : tasks) { - TezTaskID taskId = task.getTaskId(); + TezTaskID taskId = task.getTaskID(); int taskIndex = taskId.getId(); doReturn(task).when(mockVertex).getTask(taskIndex); taskMap.put(taskId, task); @@ -688,7 +688,7 @@ private List createMockTasks() { private Task createMockTask(String taskIDStr, TaskState status, float progress) { Task mockTask = mock(Task.class); - doReturn(TezTaskID.fromString(taskIDStr)).when(mockTask).getTaskId(); + doReturn(TezTaskID.fromString(taskIDStr)).when(mockTask).getTaskID(); doReturn(status).when(mockTask).getState(); doReturn(progress).when(mockTask).getProgress(); @@ -711,7 +711,7 @@ private Task createMockTask(String taskIDStr, TaskState status, float progress) private void verifySingleTaskResult(Task mockTask, Map taskResult) { Assert.assertEquals(3, taskResult.size()); - Assert.assertEquals(mockTask.getTaskId().toString(), taskResult.get("id")); + Assert.assertEquals(mockTask.getTaskID().toString(), taskResult.get("id")); Assert.assertEquals(mockTask.getState().toString(), taskResult.get("status")); Assert.assertEquals(Float.toString(mockTask.getProgress()), taskResult.get("progress")); } @@ -777,7 +777,7 @@ Map getAttemptsTestHelper(List attempts, List getAttemptsTestHelper(List attempts, List attemptsMap = Maps.newHashMap(); for(TaskAttempt attempt : attempts) { - TezTaskAttemptID attemptId = attempt.getID(); + TezTaskAttemptID attemptId = attempt.getTaskAttemptID(); doReturn(attempt).when(mockTask).getAttempt(attemptId); attemptsMap.put(attemptId, attempt); } @@ -835,7 +835,7 @@ private List createMockAttempts() { private TaskAttempt createMockAttempt(String attemptIDStr, TaskAttemptState status, float progress) { TaskAttempt mockAttempt = mock(TaskAttempt.class); - doReturn(TezTaskAttemptID.fromString(attemptIDStr)).when(mockAttempt).getID(); + doReturn(TezTaskAttemptID.fromString(attemptIDStr)).when(mockAttempt).getTaskAttemptID(); doReturn(status).when(mockAttempt).getState(); doReturn(progress).when(mockAttempt).getProgress(); @@ -858,7 +858,7 @@ private TaskAttempt createMockAttempt(String attemptIDStr, TaskAttemptState stat private void verifySingleAttemptResult(TaskAttempt mockTask, Map taskResult) { Assert.assertEquals(3, taskResult.size()); - Assert.assertEquals(mockTask.getID().toString(), taskResult.get("id")); + Assert.assertEquals(mockTask.getTaskAttemptID().toString(), taskResult.get("id")); Assert.assertEquals(mockTask.getState().toString(), taskResult.get("status")); Assert.assertEquals(Float.toString(mockTask.getProgress()), taskResult.get("progress")); } diff --git a/tez-dag/src/test/java/org/apache/tez/dag/helpers/DagInfoImplForTest.java b/tez-dag/src/test/java/org/apache/tez/dag/helpers/DagInfoImplForTest.java index b0eeaef933..26a1a0be7b 100644 --- a/tez-dag/src/test/java/org/apache/tez/dag/helpers/DagInfoImplForTest.java +++ b/tez-dag/src/test/java/org/apache/tez/dag/helpers/DagInfoImplForTest.java @@ -14,9 +14,12 @@ package org.apache.tez.dag.helpers; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.security.Credentials; import org.apache.tez.serviceplugins.api.DagInfo; +import java.util.BitSet; + public class DagInfoImplForTest implements DagInfo { private final int index; @@ -41,4 +44,19 @@ public String getName() { public Credentials getCredentials() { return null; } + + @Override + public int getTotalVertices() { + return 0; + } + + @Override + public BitSet getVertexDescendants(int vertexIndex) { + return null; + } + + @Override + public Configuration getConf() { + return null; + } } diff --git a/tez-dag/src/test/java/org/apache/tez/dag/history/events/TestHistoryEventsProtoConversion.java b/tez-dag/src/test/java/org/apache/tez/dag/history/events/TestHistoryEventsProtoConversion.java index 47d8389e34..51b4bf9f92 100644 --- a/tez-dag/src/test/java/org/apache/tez/dag/history/events/TestHistoryEventsProtoConversion.java +++ b/tez-dag/src/test/java/org/apache/tez/dag/history/events/TestHistoryEventsProtoConversion.java @@ -23,6 +23,8 @@ import java.nio.ByteBuffer; +import com.google.protobuf.CodedInputStream; +import com.google.protobuf.CodedOutputStream; import org.apache.tez.runtime.api.TaskFailureType; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -89,7 +91,9 @@ public class TestHistoryEventsProtoConversion { private HistoryEvent testProtoConversion(HistoryEvent event) throws IOException, TezException { ByteArrayOutputStream os = new ByteArrayOutputStream(); HistoryEvent deserializedEvent = null; - event.toProtoStream(os); + CodedOutputStream codedOutputStream = CodedOutputStream.newInstance(os); + event.toProtoStream(codedOutputStream); + codedOutputStream.flush(); os.flush(); os.close(); deserializedEvent = ReflectionUtils.createClazzInstance( @@ -98,7 +102,7 @@ private HistoryEvent testProtoConversion(HistoryEvent event) throws IOException, + ", eventType=" + event.getEventType() + ", bufLen=" + os.toByteArray().length); deserializedEvent.fromProtoStream( - new ByteArrayInputStream(os.toByteArray())); + CodedInputStream.newInstance(os.toByteArray())); return deserializedEvent; } @@ -182,8 +186,8 @@ private void testDAGSubmittedEvent() throws Exception { testProtoConversion(event); Assert.assertEquals(event.getApplicationAttemptId(), deserializedEvent.getApplicationAttemptId()); - Assert.assertEquals(event.getDagID(), - deserializedEvent.getDagID()); + Assert.assertEquals(event.getDAGID(), + deserializedEvent.getDAGID()); Assert.assertEquals(event.getDAGName(), deserializedEvent.getDAGName()); Assert.assertEquals(event.getSubmitTime(), @@ -200,8 +204,8 @@ private void testDAGInitializedEvent() throws Exception { "user", "dagName", null); DAGInitializedEvent deserializedEvent = (DAGInitializedEvent) testProtoConversion(event); - Assert.assertEquals(event.getDagID(), - deserializedEvent.getDagID()); + Assert.assertEquals(event.getDAGID(), + deserializedEvent.getDAGID()); Assert.assertEquals(event.getInitTime(), deserializedEvent.getInitTime()); logEvents(event, deserializedEvent); } @@ -212,8 +216,8 @@ private void testDAGStartedEvent() throws Exception { "user", "dagName"); DAGStartedEvent deserializedEvent = (DAGStartedEvent) testProtoConversion(event); - Assert.assertEquals(event.getDagID(), - deserializedEvent.getDagID()); + Assert.assertEquals(event.getDAGID(), + deserializedEvent.getDAGID()); Assert.assertEquals(event.getStartTime(), deserializedEvent.getStartTime()); logEvents(event, deserializedEvent); } @@ -239,8 +243,8 @@ private void testDAGFinishedEvent() throws Exception { DAGFinishedEvent deserializedEvent = (DAGFinishedEvent) testProtoConversion(event); Assert.assertEquals( - event.getDagID(), - deserializedEvent.getDagID()); + event.getDAGID(), + deserializedEvent.getDAGID()); Assert.assertEquals(event.getState(), deserializedEvent.getState()); Assert.assertNotEquals(event.getStartTime(), deserializedEvent.getStartTime()); Assert.assertEquals(event.getFinishTime(), deserializedEvent.getFinishTime()); @@ -260,8 +264,8 @@ private void testDAGFinishedEvent() throws Exception { DAGFinishedEvent deserializedEvent = (DAGFinishedEvent) testProtoConversion(event); Assert.assertEquals( - event.getDagID(), - deserializedEvent.getDagID()); + event.getDAGID(), + deserializedEvent.getDAGID()); Assert.assertEquals(event.getState(), deserializedEvent.getState()); Assert.assertNotEquals(event.getStartTime(), deserializedEvent.getStartTime()); Assert.assertEquals(event.getFinishTime(), deserializedEvent.getFinishTime()); diff --git a/tez-dag/src/test/java/org/apache/tez/dag/history/recovery/TestRecoveryService.java b/tez-dag/src/test/java/org/apache/tez/dag/history/recovery/TestRecoveryService.java index 790e2d8059..5d1fff9121 100644 --- a/tez-dag/src/test/java/org/apache/tez/dag/history/recovery/TestRecoveryService.java +++ b/tez-dag/src/test/java/org/apache/tez/dag/history/recovery/TestRecoveryService.java @@ -20,8 +20,8 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; -import static org.mockito.Matchers.anyInt; -import static org.mockito.Matchers.eq; +import static org.mockito.Mockito.anyInt; +import static org.mockito.Mockito.eq; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.spy; import static org.mockito.Mockito.times; diff --git a/tez-dag/src/test/java/org/apache/tez/dag/history/utils/TestDAGUtils.java b/tez-dag/src/test/java/org/apache/tez/dag/history/utils/TestDAGUtils.java index 4d4577ac77..f70224dc79 100644 --- a/tez-dag/src/test/java/org/apache/tez/dag/history/utils/TestDAGUtils.java +++ b/tez-dag/src/test/java/org/apache/tez/dag/history/utils/TestDAGUtils.java @@ -56,7 +56,7 @@ public class TestDAGUtils { @SuppressWarnings("deprecation") - private DAGPlan createDAG() { + private DAGPlan createDAG(String dagName) { // Create a plan with 3 vertices: A, B, C. Group(A,B)->C Configuration conf = new Configuration(false); int dummyTaskCount = 1; @@ -73,7 +73,7 @@ private DAGPlan createDAG() { ProcessorDescriptor.create("Processor").setHistoryText("vertex3 Processor HistoryText"), dummyTaskCount, dummyTaskResource); - DAG dag = DAG.create("testDag"); + DAG dag = DAG.create("DAG-" + dagName); dag.setCallerContext(CallerContext.create("context1", "callerId1", "callerType1", "desc1")); dag.setDAGInfo("dagInfo"); String groupName1 = "uv12"; @@ -102,7 +102,7 @@ private DAGPlan createDAG() { @Test(timeout = 5000) @SuppressWarnings("unchecked") public void testConvertDAGPlanToATSMap() throws IOException, JSONException { - DAGPlan dagPlan = createDAG(); + DAGPlan dagPlan = createDAG("testConvertDAGPlanToATSMap"); Map idNameMap = new HashMap(); ApplicationId appId = ApplicationId.newInstance(1, 1); TezDAGID dagId = TezDAGID.getInstance(appId, 1); @@ -115,7 +115,8 @@ public void testConvertDAGPlanToATSMap() throws IOException, JSONException { Map atsMap = DAGUtils.convertDAGPlanToATSMap(dagPlan); Assert.assertTrue(atsMap.containsKey(DAGUtils.DAG_NAME_KEY)); - Assert.assertEquals("testDag", atsMap.get(DAGUtils.DAG_NAME_KEY)); + Assert.assertEquals("DAG-testConvertDAGPlanToATSMap", + atsMap.get(DAGUtils.DAG_NAME_KEY)); Assert.assertTrue(atsMap.containsKey(DAGUtils.DAG_INFO_KEY)); Assert.assertTrue(atsMap.containsKey(DAGUtils.DAG_CONTEXT_KEY)); Map contextMap = (Map)atsMap.get(DAGUtils.DAG_CONTEXT_KEY); diff --git a/tez-dag/src/test/java/org/apache/tez/test/ControlledScheduledExecutorService.java b/tez-dag/src/test/java/org/apache/tez/test/ControlledScheduledExecutorService.java new file mode 100644 index 0000000000..f6da15b81a --- /dev/null +++ b/tez-dag/src/test/java/org/apache/tez/test/ControlledScheduledExecutorService.java @@ -0,0 +1,239 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tez.test; + +import org.apache.tez.dag.app.MockClock; +import org.apache.tez.dag.app.MockClock.MockClockListener; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; +import java.util.PriorityQueue; +import java.util.concurrent.Callable; +import java.util.concurrent.Delayed; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.Future; +import java.util.concurrent.FutureTask; +import java.util.concurrent.RejectedExecutionException; +import java.util.concurrent.RunnableScheduledFuture; +import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.ScheduledFuture; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicLong; + +/** A scheduled executor service with timing that can be controlled for unit tests. */ +public class ControlledScheduledExecutorService implements ScheduledExecutorService, MockClockListener { + private final MockClock clock; + private final PriorityQueue> queue = new PriorityQueue<>(); + private final AtomicLong nextSequenceNum = new AtomicLong(0); + private final AtomicBoolean stopped = new AtomicBoolean(false); + + public ControlledScheduledExecutorService(MockClock clock) { + this.clock = clock; + clock.register(this); + } + + @Override + public ScheduledFuture schedule(Runnable command, long delay, TimeUnit unit) { + ScheduledFutureTask task = new ScheduledFutureTask<>(command, null, toTimestamp(delay, unit)); + schedule(task); + return task; + } + + @Override + public ScheduledFuture schedule(Callable callable, long delay, TimeUnit unit) { + ScheduledFutureTask task = new ScheduledFutureTask<>(callable, toTimestamp(delay, unit)); + schedule(task); + return task; + } + + @Override + public ScheduledFuture scheduleWithFixedDelay(Runnable command, long initialDelay, long delay, TimeUnit unit) { + ScheduledFutureTask task = new ScheduledFutureTask<>(command, null, + toTimestamp(initialDelay, unit), unit.toMillis(delay)); + schedule(task); + return task; + } + + @Override + public ScheduledFuture scheduleAtFixedRate(Runnable command, long initialDelay, long period, TimeUnit unit) { + return scheduleWithFixedDelay(command, initialDelay, period, unit); + } + + @Override + public Future submit(Callable callable) { + ScheduledFutureTask task = new ScheduledFutureTask<>(callable, 0); + schedule(task); + return task; + } + + @Override + public Future submit(Runnable runnable, T result) { + ScheduledFutureTask task = new ScheduledFutureTask<>(runnable, result, 0); + schedule(task); + return task; + } + + @Override + public Future submit(Runnable runnable) { + ScheduledFutureTask task = new ScheduledFutureTask<>(runnable, null, 0); + schedule(task); + return task; + } + + @Override + public List> invokeAll(Collection> tasks) { + throw new UnsupportedOperationException("invokeAll not yet implemented"); + } + + @Override + public List> invokeAll(Collection> tasks, long timeout, TimeUnit unit) throws InterruptedException { + throw new UnsupportedOperationException("invokeAll not yet implemented"); + } + + @Override + public T invokeAny(Collection> tasks) throws InterruptedException, ExecutionException { + throw new UnsupportedOperationException("invokeAny not yet implemented"); + } + + @Override + public T invokeAny(Collection> tasks, long timeout, TimeUnit unit) throws InterruptedException, ExecutionException, TimeoutException { + throw new UnsupportedOperationException("invokeAny not yet implemented"); + } + + @Override + public void execute(Runnable command) { + submit(command); + } + + @Override + public void shutdown() { + stopped.set(true); + } + + @Override + public List shutdownNow() { + stopped.set(true); + return new ArrayList(queue); + } + + @Override + public boolean isShutdown() { + return stopped.get(); + } + + @Override + public boolean isTerminated() { + return false; + } + + @Override + public boolean awaitTermination(long timeout, TimeUnit unit) throws InterruptedException { + return false; + } + + @Override + public void onTimeUpdated(long newTime) { + ScheduledFutureTask task = queue.peek(); + while (task != null && task.timestamp <= newTime) { + task = queue.poll(); + runTask(task); + task = queue.peek(); + } + } + + private long now() { + return clock.getTime(); + } + + private long toTimestamp(long delay, TimeUnit unit) { + return now() + unit.toMillis(delay); + } + + private void schedule(ScheduledFutureTask task) { + if (isShutdown()) { + throw new RejectedExecutionException("Executor has been shutdown"); + } + if (now() - task.timestamp >= 0) { + runTask(task); + } else { + queue.add(task); + } + } + + private void runTask(ScheduledFutureTask task) { + task.run(); + if (task.isPeriodic() && !isShutdown()) { + task.timestamp = toTimestamp(task.period, TimeUnit.MILLISECONDS); + queue.add(task); + } + } + + private class ScheduledFutureTask extends FutureTask implements RunnableScheduledFuture { + private final long sequenceNum; + private final long period; + private long timestamp; + + public ScheduledFutureTask(Callable callable, long timestamp) { + super(callable); + this.sequenceNum = nextSequenceNum.getAndIncrement(); + this.timestamp = timestamp; + this.period = 0; + } + + public ScheduledFutureTask(Runnable runnable, V result, long timestamp) { + super(runnable, result); + this.sequenceNum = nextSequenceNum.getAndIncrement(); + this.timestamp = timestamp; + this.period = 0; + } + + public ScheduledFutureTask(Runnable runnable, V result, long timestamp, long period) { + super(runnable, result); + this.sequenceNum = nextSequenceNum.getAndIncrement(); + this.timestamp = timestamp; + this.period = period; + } + + @Override + public boolean isPeriodic() { + return period != 0; + } + + @Override + public long getDelay(TimeUnit unit) { + return unit.convert(timestamp - now(), TimeUnit.MILLISECONDS); + } + + @Override + public int compareTo(Delayed o) { + if (o == this) { + return 0; + } + int result = Long.compare(getDelay(TimeUnit.MILLISECONDS), o.getDelay(TimeUnit.MILLISECONDS)); + if (result == 0 && o instanceof ScheduledFutureTask) { + ScheduledFutureTask otherTask = (ScheduledFutureTask) o; + result = Long.compare(sequenceNum, otherTask.sequenceNum); + } + return result; + } + } +} diff --git a/tez-dag/src/test/java/org/apache/tez/test/GraceShuffleVertexManagerForTest.java b/tez-dag/src/test/java/org/apache/tez/test/GraceShuffleVertexManagerForTest.java index 40a6bd3ad1..ff89ef83eb 100644 --- a/tez-dag/src/test/java/org/apache/tez/test/GraceShuffleVertexManagerForTest.java +++ b/tez-dag/src/test/java/org/apache/tez/test/GraceShuffleVertexManagerForTest.java @@ -18,7 +18,7 @@ package org.apache.tez.test; -import com.google.common.base.Preconditions; +import org.apache.tez.common.Preconditions; import com.google.protobuf.ByteString; import org.apache.hadoop.conf.Configuration; @@ -33,6 +33,7 @@ import java.io.IOException; import java.util.EnumSet; +import java.util.Objects; /** * A shuffle vertex manager that will set the vertex's parallelism upon @@ -149,7 +150,7 @@ public ByteString toByteString() throws IOException { } private GraceConf build() { - Preconditions.checkNotNull(grandparentVertex, + Objects.requireNonNull(grandparentVertex, "Grandparent vertex is required"); Preconditions.checkArgument(desiredParallelism > 0, "Desired parallelism must be greater than 0"); diff --git a/tez-dag/src/test/resources/META-INF/LICENSE.txt b/tez-dag/src/test/resources/META-INF/LICENSE similarity index 100% rename from tez-dag/src/test/resources/META-INF/LICENSE.txt rename to tez-dag/src/test/resources/META-INF/LICENSE diff --git a/tez-dag/src/test/resources/META-INF/NOTICE b/tez-dag/src/test/resources/META-INF/NOTICE new file mode 100644 index 0000000000..2595905699 --- /dev/null +++ b/tez-dag/src/test/resources/META-INF/NOTICE @@ -0,0 +1,6 @@ +Apache Tez +Copyright 2014-2024 The Apache Software Foundation + +This product includes software developed at +The Apache Software Foundation (http://www.apache.org/). + diff --git a/tez-dag/src/test/resources/META-INF/NOTICE.txt b/tez-dag/src/test/resources/META-INF/NOTICE.txt deleted file mode 100644 index 3f36fcc6ba..0000000000 --- a/tez-dag/src/test/resources/META-INF/NOTICE.txt +++ /dev/null @@ -1,6 +0,0 @@ -Apache Tez -Copyright (c) 2016 The Apache Software Foundation - -This product includes software developed at -The Apache Software Foundation (http://www.apache.org/). - diff --git a/tez-dist/dist-files/full/LICENSE b/tez-dist/dist-files/full/LICENSE index 2647d1399b..7b4c36c903 100644 --- a/tez-dist/dist-files/full/LICENSE +++ b/tez-dist/dist-files/full/LICENSE @@ -306,7 +306,7 @@ The Apache TEZ binary distribution bundles the following files under the CDDL Li - jersey-json-*.jar - jaxb-api-*.jar - jaxb-impl-*.jar - - servlet-api-*.jar + - javax.servlet-api-*.jar The text for this license can be found in the LICENSE-CDDLv1.1-GPLv2_withCPE file. diff --git a/tez-dist/dist-files/full/NOTICE b/tez-dist/dist-files/full/NOTICE index 99b8d15791..d6d7f11581 100644 --- a/tez-dist/dist-files/full/NOTICE +++ b/tez-dist/dist-files/full/NOTICE @@ -1,5 +1,5 @@ Apache Tez -Copyright (c) 2015 The Apache Software Foundation +Copyright 2014-2024 The Apache Software Foundation This product includes software developed at The Apache Software Foundation (http://www.apache.org/). diff --git a/tez-dist/dist-files/minimal/LICENSE b/tez-dist/dist-files/minimal/LICENSE index c89bc24176..7083a9f8c9 100644 --- a/tez-dist/dist-files/minimal/LICENSE +++ b/tez-dist/dist-files/minimal/LICENSE @@ -234,7 +234,6 @@ license: - hadoop-shim-*.jar - async-http-client-*.jar - commons-cli-*.jar - - commons-collections-*.jar - commons-io-*.jar - commons-math3-*.jar - commons-codec-*.jar @@ -261,7 +260,7 @@ The Apache TEZ binary distribution bundles the following files under the CDDL Li - jersey-client-*.jar - jersey-json-*.jar - - servlet-api-*.jar + - javax.servlet-api-*.jar The full text of the license is available in LICENSE-CDDLv1.1-GPLv2_withCPE. diff --git a/tez-dist/dist-files/minimal/NOTICE b/tez-dist/dist-files/minimal/NOTICE index 9c69dff5f6..c036d1c1ff 100644 --- a/tez-dist/dist-files/minimal/NOTICE +++ b/tez-dist/dist-files/minimal/NOTICE @@ -1,5 +1,5 @@ Apache Tez -Copyright (c) 2015 The Apache Software Foundation +Copyright 2014-2024 The Apache Software Foundation This product includes software developed at The Apache Software Foundation (http://www.apache.org/). diff --git a/tez-dist/pom.xml b/tez-dist/pom.xml index 2187bfc3a0..6e0721f371 100644 --- a/tez-dist/pom.xml +++ b/tez-dist/pom.xml @@ -21,7 +21,7 @@ org.apache.tez tez - 0.9.1-SNAPSHOT + 0.10.5-SNAPSHOT tez-dist @@ -49,6 +49,12 @@ ${project.version} test-jar + + org.apache.tez + tez-job-analyzer + ${project.version} + provided + @@ -59,7 +65,7 @@ hadoop27 - true + false @@ -67,6 +73,11 @@ tez-yarn-timeline-history-with-acls ${project.version} + + org.apache.tez + tez-protobuf-history-plugin + ${project.version} + org.apache.tez hadoop-shim-2.7 @@ -77,7 +88,7 @@ hadoop28 - false + true @@ -95,6 +106,11 @@ tez-yarn-timeline-cache-plugin ${project.version} + + org.apache.tez + tez-protobuf-history-plugin + ${project.version} + org.apache.tez hadoop-shim-2.8 diff --git a/tez-dist/src/main/assembly/tez-dist-minimal.xml b/tez-dist/src/main/assembly/tez-dist-minimal.xml index 80633ffd57..4c95d37efa 100644 --- a/tez-dist/src/main/assembly/tez-dist-minimal.xml +++ b/tez-dist/src/main/assembly/tez-dist-minimal.xml @@ -24,6 +24,9 @@ true org.apache.tez:tez-aux-services + org.apache.tez:tez-ext-service-tests + org.apache.tez:tez-ui + org.apache.tez:tez-docs / diff --git a/tez-dist/src/main/assembly/tez-dist.xml b/tez-dist/src/main/assembly/tez-dist.xml index b8834a88a7..665121aee1 100644 --- a/tez-dist/src/main/assembly/tez-dist.xml +++ b/tez-dist/src/main/assembly/tez-dist.xml @@ -24,6 +24,7 @@ true org.apache.tez:tez-aux-services + org.apache.tez:tez-ui / @@ -36,6 +37,7 @@ *:*:test-jar org.apache.tez:* + jline:jline diff --git a/tez-dist/src/main/javadoc/resources/META-INF/LICENSE.txt b/tez-dist/src/main/javadoc/resources/META-INF/LICENSE similarity index 100% rename from tez-dist/src/main/javadoc/resources/META-INF/LICENSE.txt rename to tez-dist/src/main/javadoc/resources/META-INF/LICENSE diff --git a/tez-dist/src/main/javadoc/resources/META-INF/NOTICE b/tez-dist/src/main/javadoc/resources/META-INF/NOTICE new file mode 100644 index 0000000000..2595905699 --- /dev/null +++ b/tez-dist/src/main/javadoc/resources/META-INF/NOTICE @@ -0,0 +1,6 @@ +Apache Tez +Copyright 2014-2024 The Apache Software Foundation + +This product includes software developed at +The Apache Software Foundation (http://www.apache.org/). + diff --git a/tez-dist/src/main/javadoc/resources/META-INF/NOTICE.txt b/tez-dist/src/main/javadoc/resources/META-INF/NOTICE.txt deleted file mode 100644 index 3f36fcc6ba..0000000000 --- a/tez-dist/src/main/javadoc/resources/META-INF/NOTICE.txt +++ /dev/null @@ -1,6 +0,0 @@ -Apache Tez -Copyright (c) 2016 The Apache Software Foundation - -This product includes software developed at -The Apache Software Foundation (http://www.apache.org/). - diff --git a/tez-examples/pom.xml b/tez-examples/pom.xml index b20e973785..3ab7ae5a0c 100644 --- a/tez-examples/pom.xml +++ b/tez-examples/pom.xml @@ -20,7 +20,7 @@ org.apache.tez tez - 0.9.1-SNAPSHOT + 0.10.5-SNAPSHOT tez-examples diff --git a/tez-examples/src/main/java/org/apache/tez/examples/CartesianProduct.java b/tez-examples/src/main/java/org/apache/tez/examples/CartesianProduct.java index 84367f8dfa..a925137f97 100644 --- a/tez-examples/src/main/java/org/apache/tez/examples/CartesianProduct.java +++ b/tez-examples/src/main/java/org/apache/tez/examples/CartesianProduct.java @@ -17,7 +17,7 @@ */ package org.apache.tez.examples; -import com.google.common.base.Preconditions; +import org.apache.tez.common.Preconditions; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; diff --git a/tez-examples/src/main/java/org/apache/tez/examples/ExampleDriver.java b/tez-examples/src/main/java/org/apache/tez/examples/ExampleDriver.java index 074f77fa19..c143e21d77 100644 --- a/tez-examples/src/main/java/org/apache/tez/examples/ExampleDriver.java +++ b/tez-examples/src/main/java/org/apache/tez/examples/ExampleDriver.java @@ -18,29 +18,17 @@ package org.apache.tez.examples; -import java.io.IOException; -import java.text.DecimalFormat; -import java.util.EnumSet; -import java.util.Set; - import org.apache.hadoop.util.ProgramDriver; -import org.apache.tez.common.counters.TezCounters; -import org.apache.tez.dag.api.TezException; -import org.apache.tez.dag.api.client.DAGClient; -import org.apache.tez.dag.api.client.DAGStatus; -import org.apache.tez.dag.api.client.Progress; -import org.apache.tez.dag.api.client.StatusGetOpts; -import org.apache.tez.dag.api.client.VertexStatus; /** * A description of an example program based on its class and a * human-readable description. */ -public class ExampleDriver { +public final class ExampleDriver { - private static final DecimalFormat formatter = new DecimalFormat("###.##%"); + private ExampleDriver() {} - public static void main(String argv[]){ + public static void main(String[] argv){ int exitCode = -1; ProgramDriver pgd = new ProgramDriver(); try { diff --git a/tez-examples/src/main/java/org/apache/tez/examples/HashJoinExample.java b/tez-examples/src/main/java/org/apache/tez/examples/HashJoinExample.java index 935ccbca10..361d7a9814 100644 --- a/tez-examples/src/main/java/org/apache/tez/examples/HashJoinExample.java +++ b/tez-examples/src/main/java/org/apache/tez/examples/HashJoinExample.java @@ -53,7 +53,7 @@ import org.apache.tez.runtime.library.partitioner.HashPartitioner; import org.apache.tez.runtime.library.processor.SimpleProcessor; -import com.google.common.base.Preconditions; +import org.apache.tez.common.Preconditions; /** * Simple example of joining 2 data sets using
  • Because of the sort implemention + * they are both sorted already.
  • Because of the sort implementation * difference we describe above, the data requirement is also different for * these 2 sort algorithms. For {@link HashJoinExample} It is required that keys * in the hashFile are unique. while for {@link SortMergeJoinExample} it is @@ -105,7 +105,8 @@ protected int runJob(String[] args, TezConfiguration tezConf, Path outputPath = new Path(outputDir); // Verify output path existence - FileSystem fs = FileSystem.get(tezConf); + FileSystem fs = outputPath.getFileSystem(tezConf); + outputPath = fs.makeQualified(outputPath); if (fs.exists(outputPath)) { System.err.println("Output directory: " + outputDir + " already exists"); return 3; @@ -132,7 +133,7 @@ protected int validateArgs(String[] otherArgs) { * v1 v2
    *  \ /
    *   v3
    - * + * * @param tezConf * @param inputPath1 * @param inputPath2 @@ -268,7 +269,7 @@ public void run() throws Exception { /** * Join 2 sorted inputs both from {@link KeyValuesReader} and write output * using {@link KeyValueWriter} - * + * * @param inputReader1 * @param inputReader2 * @param writer diff --git a/tez-examples/src/main/java/org/apache/tez/examples/TezExampleBase.java b/tez-examples/src/main/java/org/apache/tez/examples/TezExampleBase.java index a3c0224abf..cb521055e4 100644 --- a/tez-examples/src/main/java/org/apache/tez/examples/TezExampleBase.java +++ b/tez-examples/src/main/java/org/apache/tez/examples/TezExampleBase.java @@ -63,11 +63,16 @@ public abstract class TezExampleBase extends Configured implements Tool { protected static final String LOCAL_MODE = "local"; protected static final String COUNTER_LOG = "counter"; protected static final String GENERATE_SPLIT_IN_CLIENT = "generateSplitInClient"; + protected static final String LEAVE_AM_RUNNING = "leaveAmRunning"; + protected static final String RECONNECT_APP_ID = "reconnectAppId"; + private boolean disableSplitGrouping = false; private boolean isLocalMode = false; private boolean isCountersLog = false; private boolean generateSplitInClient = false; + private boolean leaveAmRunning = false; + private String reconnectAppId; private HadoopShim hadoopShim; protected boolean isCountersLog() { @@ -88,6 +93,8 @@ private Options getExtraOptions() { options.addOption(DISABLE_SPLIT_GROUPING, false , "disable split grouping"); options.addOption(COUNTER_LOG, false , "print counter log"); options.addOption(GENERATE_SPLIT_IN_CLIENT, false, "whether generate split in client"); + options.addOption(LEAVE_AM_RUNNING, false, "whether client should stop session"); + options.addOption(RECONNECT_APP_ID, true, "appId for client reconnect"); return options; } @@ -108,6 +115,12 @@ public final int run(String[] args) throws Exception { if (optionParser.getCommandLine().hasOption(GENERATE_SPLIT_IN_CLIENT)) { generateSplitInClient = true; } + if (optionParser.getCommandLine().hasOption(LEAVE_AM_RUNNING)) { + leaveAmRunning = true; + } + if (optionParser.getCommandLine().hasOption(RECONNECT_APP_ID)) { + reconnectAppId = optionParser.getCommandLine().getOptionValue(RECONNECT_APP_ID); + } hadoopShim = new HadoopShimsLoader(conf).getHadoopShim(); return _execute(otherArgs, null, null); @@ -231,15 +244,20 @@ private int _execute(String[] otherArgs, TezConfiguration tezConf, TezClient tez try { return runJob(otherArgs, tezConf, tezClientInternal); } finally { - if (ownTezClient && tezClientInternal != null) { + if (ownTezClient && tezClientInternal != null && !leaveAmRunning) { tezClientInternal.stop(); } } } private TezClient createTezClient(TezConfiguration tezConf) throws IOException, TezException { - TezClient tezClient = TezClient.create(getClass().getSimpleName(), tezConf); - tezClient.start(); + TezClient tezClient = TezClient.create("TezExampleApplication", tezConf); + if(reconnectAppId != null) { + ApplicationId appId = TezClient.appIdfromString(reconnectAppId); + tezClient.getClient(appId); + } else { + tezClient.start(); + } return tezClient; } @@ -258,13 +276,14 @@ private void _printUsage() { protected void printExtraOptionsUsage(PrintStream ps) { ps.println("Tez example extra options supported are"); - // TODO TEZ-1348 make it able to access dfs in tez local mode - ps.println("-" + LOCAL_MODE + "\t\trun it in tez local mode, currently it can only access local file system in tez local mode," + ps.println("-" + LOCAL_MODE + "\t\trun it in tez local mode, " + " run it in distributed mode without this option"); ps.println("-" + DISABLE_SPLIT_GROUPING + "\t\t disable split grouping for MRInput," + " enable split grouping without this option."); ps.println("-" + COUNTER_LOG + "\t\t to print counters information"); ps.println("-" + GENERATE_SPLIT_IN_CLIENT + "\t\tgenerate input split in client"); + ps.println("-" + LEAVE_AM_RUNNING + "\t\twhether client should stop session"); + ps.println("-" + RECONNECT_APP_ID + "\t\tappId for client reconnect"); ps.println(); ps.println("The Tez example extra options usage syntax is "); ps.println("example_name [extra_options] [example_parameters]"); diff --git a/tez-examples/src/main/java/org/apache/tez/examples/WordCount.java b/tez-examples/src/main/java/org/apache/tez/examples/WordCount.java index 6149193212..c58c3bad75 100644 --- a/tez-examples/src/main/java/org/apache/tez/examples/WordCount.java +++ b/tez-examples/src/main/java/org/apache/tez/examples/WordCount.java @@ -47,7 +47,7 @@ import org.apache.tez.runtime.library.partitioner.HashPartitioner; import org.apache.tez.runtime.library.processor.SimpleProcessor; -import com.google.common.base.Preconditions; +import org.apache.tez.common.Preconditions; /** * Simple example to perform WordCount using Tez API's. WordCount is the diff --git a/tez-examples/src/main/javadoc/resources/META-INF/LICENSE.txt b/tez-examples/src/main/javadoc/resources/META-INF/LICENSE similarity index 100% rename from tez-examples/src/main/javadoc/resources/META-INF/LICENSE.txt rename to tez-examples/src/main/javadoc/resources/META-INF/LICENSE diff --git a/tez-examples/src/main/javadoc/resources/META-INF/NOTICE b/tez-examples/src/main/javadoc/resources/META-INF/NOTICE new file mode 100644 index 0000000000..2595905699 --- /dev/null +++ b/tez-examples/src/main/javadoc/resources/META-INF/NOTICE @@ -0,0 +1,6 @@ +Apache Tez +Copyright 2014-2024 The Apache Software Foundation + +This product includes software developed at +The Apache Software Foundation (http://www.apache.org/). + diff --git a/tez-examples/src/main/javadoc/resources/META-INF/NOTICE.txt b/tez-examples/src/main/javadoc/resources/META-INF/NOTICE.txt deleted file mode 100644 index 3f36fcc6ba..0000000000 --- a/tez-examples/src/main/javadoc/resources/META-INF/NOTICE.txt +++ /dev/null @@ -1,6 +0,0 @@ -Apache Tez -Copyright (c) 2016 The Apache Software Foundation - -This product includes software developed at -The Apache Software Foundation (http://www.apache.org/). - diff --git a/tez-examples/src/main/resources/META-INF/LICENSE.txt b/tez-examples/src/main/resources/META-INF/LICENSE similarity index 100% rename from tez-examples/src/main/resources/META-INF/LICENSE.txt rename to tez-examples/src/main/resources/META-INF/LICENSE diff --git a/tez-examples/src/main/resources/META-INF/NOTICE b/tez-examples/src/main/resources/META-INF/NOTICE new file mode 100644 index 0000000000..2595905699 --- /dev/null +++ b/tez-examples/src/main/resources/META-INF/NOTICE @@ -0,0 +1,6 @@ +Apache Tez +Copyright 2014-2024 The Apache Software Foundation + +This product includes software developed at +The Apache Software Foundation (http://www.apache.org/). + diff --git a/tez-examples/src/main/resources/META-INF/NOTICE.txt b/tez-examples/src/main/resources/META-INF/NOTICE.txt deleted file mode 100644 index 3f36fcc6ba..0000000000 --- a/tez-examples/src/main/resources/META-INF/NOTICE.txt +++ /dev/null @@ -1,6 +0,0 @@ -Apache Tez -Copyright (c) 2016 The Apache Software Foundation - -This product includes software developed at -The Apache Software Foundation (http://www.apache.org/). - diff --git a/tez-examples/src/test/resources/META-INF/LICENSE.txt b/tez-examples/src/test/resources/META-INF/LICENSE similarity index 100% rename from tez-examples/src/test/resources/META-INF/LICENSE.txt rename to tez-examples/src/test/resources/META-INF/LICENSE diff --git a/tez-examples/src/test/resources/META-INF/NOTICE b/tez-examples/src/test/resources/META-INF/NOTICE new file mode 100644 index 0000000000..2595905699 --- /dev/null +++ b/tez-examples/src/test/resources/META-INF/NOTICE @@ -0,0 +1,6 @@ +Apache Tez +Copyright 2014-2024 The Apache Software Foundation + +This product includes software developed at +The Apache Software Foundation (http://www.apache.org/). + diff --git a/tez-examples/src/test/resources/META-INF/NOTICE.txt b/tez-examples/src/test/resources/META-INF/NOTICE.txt deleted file mode 100644 index 3f36fcc6ba..0000000000 --- a/tez-examples/src/test/resources/META-INF/NOTICE.txt +++ /dev/null @@ -1,6 +0,0 @@ -Apache Tez -Copyright (c) 2016 The Apache Software Foundation - -This product includes software developed at -The Apache Software Foundation (http://www.apache.org/). - diff --git a/tez-ext-service-tests/pom.xml b/tez-ext-service-tests/pom.xml index d9d47c9726..532588a9ce 100644 --- a/tez-ext-service-tests/pom.xml +++ b/tez-ext-service-tests/pom.xml @@ -20,20 +20,23 @@ tez org.apache.tez - 0.9.1-SNAPSHOT + 0.10.5-SNAPSHOT tez-ext-service-tests + + ${project.build.directory}/logs + + io.netty - netty - 3.6.2.Final + netty-all org.slf4j - slf4j-log4j12 + slf4j-reload4j com.google.guava @@ -42,6 +45,12 @@ org.apache.hadoop hadoop-common + + + org.apache.avro + avro + + org.apache.hadoop @@ -54,9 +63,13 @@ org.mockito - mockito-all + mockito-core test + + org.apache.tez + hadoop-shim + org.apache.tez tez-runtime-internals @@ -96,8 +109,7 @@ org.apache.hadoop - hadoop-mapreduce-client-jobclient - test-jar + hadoop-mapreduce-client-shuffle test @@ -127,34 +139,61 @@ + + maven-antrun-plugin + + + generate-sources + generate-sources + + + + + + + + run + + + + + + org.apache.maven.plugins + maven-surefire-plugin + + + ${test.log.dir} + + + org.apache.rat apache-rat-plugin - org.apache.hadoop - hadoop-maven-plugins + com.github.os72 + protoc-jar-maven-plugin - compile-protoc generate-sources - protoc + run - ${protobuf.version} + com.google.protobuf:protoc:${protobuf.version} ${protoc.path} - - ${basedir}/src/test/proto + none + ${basedir}/../tez-api/src/main/proto - - - ${basedir}/src/test/proto - - TezDaemonProtocol.proto - - - ${project.build.directory}/generated-test-sources/java + + + ${basedir}/src/test/proto + + + + ${project.build.directory}/generated-test-sources/java + + diff --git a/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/TezTestServiceCommunicator.java b/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/TezTestServiceCommunicator.java index ac50878cf5..713a3d3fcd 100644 --- a/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/TezTestServiceCommunicator.java +++ b/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/TezTestServiceCommunicator.java @@ -29,6 +29,7 @@ import com.google.common.util.concurrent.ThreadFactoryBuilder; import com.google.protobuf.Message; import org.apache.hadoop.service.AbstractService; +import org.apache.tez.common.GuavaShim; import org.apache.tez.service.TezTestServiceProtocolBlockingPB; import org.apache.tez.service.impl.TezTestServiceProtocolClientImpl; import org.apache.tez.test.service.rpc.TezTestServiceProtocolProtos.RunContainerRequestProto; @@ -69,7 +70,7 @@ public void onSuccess(RunContainerResponseProto result) { public void onFailure(Throwable t) { callback.indicateError(t); } - }); + }, GuavaShim.directExecutor()); } @@ -86,7 +87,7 @@ public void onSuccess(SubmitWorkResponseProto result) { public void onFailure(Throwable t) { callback.indicateError(t); } - }); + }, GuavaShim.directExecutor()); } diff --git a/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/launcher/TezTestServiceContainerLauncher.java b/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/launcher/TezTestServiceContainerLauncher.java index 845a27b091..110d918edf 100644 --- a/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/launcher/TezTestServiceContainerLauncher.java +++ b/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/launcher/TezTestServiceContainerLauncher.java @@ -17,7 +17,7 @@ import java.io.IOException; import java.net.InetSocketAddress; -import com.google.common.base.Preconditions; +import org.apache.tez.common.Preconditions; import com.google.protobuf.ByteString; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.util.StringUtils; diff --git a/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/rm/TezTestServiceTaskSchedulerService.java b/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/rm/TezTestServiceTaskSchedulerService.java index 8b91ddecd9..eafedef11c 100644 --- a/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/rm/TezTestServiceTaskSchedulerService.java +++ b/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/rm/TezTestServiceTaskSchedulerService.java @@ -23,7 +23,7 @@ import java.util.concurrent.ConcurrentMap; import java.util.concurrent.atomic.AtomicInteger; -import com.google.common.base.Preconditions; +import org.apache.tez.common.Preconditions; import com.google.common.primitives.Ints; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; @@ -251,4 +251,9 @@ public Container createContainer(Resource capability, Priority priority, String return container; } } + + @Override + public int getHeldContainersCount() { + return 0; + } } diff --git a/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/rm/TezTestServiceTaskSchedulerServiceWithErrors.java b/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/rm/TezTestServiceTaskSchedulerServiceWithErrors.java index 13d4815f02..60cc1f22a9 100644 --- a/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/rm/TezTestServiceTaskSchedulerServiceWithErrors.java +++ b/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/rm/TezTestServiceTaskSchedulerServiceWithErrors.java @@ -99,4 +99,9 @@ public boolean hasUnregistered() { @Override public void dagComplete() { } + + @Override + public int getHeldContainersCount() { + return 0; + } } diff --git a/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/taskcomm/TezTestServiceTaskCommunicatorImpl.java b/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/taskcomm/TezTestServiceTaskCommunicatorImpl.java index 732c81af67..377217b0cf 100644 --- a/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/taskcomm/TezTestServiceTaskCommunicatorImpl.java +++ b/tez-ext-service-tests/src/test/java/org/apache/tez/dag/app/taskcomm/TezTestServiceTaskCommunicatorImpl.java @@ -61,8 +61,7 @@ public TezTestServiceTaskCommunicatorImpl( SubmitWorkRequestProto.Builder baseBuilder = SubmitWorkRequestProto.newBuilder(); - // TODO Avoid reading this from the environment - baseBuilder.setUser(System.getenv(ApplicationConstants.Environment.USER.name())); + baseBuilder.setUser(System.getProperty("user.name")); baseBuilder.setApplicationIdString( taskCommunicatorContext.getApplicationAttemptId().getApplicationId().toString()); baseBuilder diff --git a/tez-ext-service-tests/src/test/java/org/apache/tez/service/MiniTezTestServiceCluster.java b/tez-ext-service-tests/src/test/java/org/apache/tez/service/MiniTezTestServiceCluster.java index c5ff02dd6a..3a6935f52b 100644 --- a/tez-ext-service-tests/src/test/java/org/apache/tez/service/MiniTezTestServiceCluster.java +++ b/tez-ext-service-tests/src/test/java/org/apache/tez/service/MiniTezTestServiceCluster.java @@ -18,7 +18,7 @@ import java.io.IOException; import java.net.InetSocketAddress; -import com.google.common.base.Preconditions; +import org.apache.tez.common.Preconditions; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileContext; import org.apache.hadoop.fs.Path; diff --git a/tez-ext-service-tests/src/test/java/org/apache/tez/service/TezTestServiceConfConstants.java b/tez-ext-service-tests/src/test/java/org/apache/tez/service/TezTestServiceConfConstants.java index bf4a5bdae1..e088ef791e 100644 --- a/tez-ext-service-tests/src/test/java/org/apache/tez/service/TezTestServiceConfConstants.java +++ b/tez-ext-service-tests/src/test/java/org/apache/tez/service/TezTestServiceConfConstants.java @@ -14,7 +14,7 @@ package org.apache.tez.service; -public class TezTestServiceConfConstants { +public final class TezTestServiceConfConstants { private static final String TEZ_TEST_SERVICE_PREFIX = "tez.test.service."; @@ -38,4 +38,5 @@ public class TezTestServiceConfConstants { public static final String TEZ_TEST_SERVICE_AM_COMMUNICATOR_NUM_THREADS = TEZ_TEST_SERVICE_PREFIX + "communicator.num.threads"; public static final int TEZ_TEST_SERVICE_AM_COMMUNICATOR_NUM_THREADS_DEFAULT = 2; + private TezTestServiceConfConstants() {} } diff --git a/tez-ext-service-tests/src/test/java/org/apache/tez/service/impl/ContainerRunnerImpl.java b/tez-ext-service-tests/src/test/java/org/apache/tez/service/impl/ContainerRunnerImpl.java index 5edfd7f652..7864e1c852 100644 --- a/tez-ext-service-tests/src/test/java/org/apache/tez/service/impl/ContainerRunnerImpl.java +++ b/tez-ext-service-tests/src/test/java/org/apache/tez/service/impl/ContainerRunnerImpl.java @@ -30,7 +30,8 @@ import java.util.concurrent.atomic.AtomicLong; import java.util.concurrent.atomic.AtomicReference; -import com.google.common.base.Preconditions; +import org.apache.tez.common.GuavaShim; +import org.apache.tez.common.Preconditions; import com.google.common.collect.HashMultimap; import com.google.common.collect.Multimap; import com.google.common.util.concurrent.FutureCallback; @@ -210,7 +211,7 @@ public void queueContainer(RunContainerRequestProto request) throws TezException workingDir, credentials, memoryPerExecutor); ListenableFuture future = executorService .submit(callable); - Futures.addCallback(future, new ContainerRunnerCallback(request, callable)); + Futures.addCallback(future, new ContainerRunnerCallback(request, callable), GuavaShim.directExecutor()); } /** @@ -265,11 +266,12 @@ public void submitWork(SubmitWorkRequestProto request) throws TezException { // TODO Unregistering does not happen at the moment, since there's no signals on when an app completes. LOG.info("Registering request with the ShuffleHandler for containerId {}", request.getContainerIdString()); ShuffleHandler.get().registerApplication(request.getApplicationIdString(), jobToken, request.getUser()); + TezCommonUtils.logCredentials(LOG, credentials, "taskCallable"); TaskRunnerCallable callable = new TaskRunnerCallable(request, new Configuration(getConfig()), new ExecutionContextImpl(localAddress.get().getHostName()), env, localDirs, workingDir, credentials, memoryPerExecutor, sharedExecutor); ListenableFuture future = executorService.submit(callable); - Futures.addCallback(future, new TaskRunnerCallback(request, callable)); + Futures.addCallback(future, new TaskRunnerCallback(request, callable), GuavaShim.directExecutor()); } @@ -456,6 +458,7 @@ public TezTaskUmbilicalProtocol run() throws Exception { new AtomicLong(0), request.getContainerIdString()); + TezCommonUtils.logCredentials(LOG, taskUgi.getCredentials(), "taskUgi"); taskRunner = new TezTaskRunner2(conf, taskUgi, localDirs, ProtoConverters.getTaskSpecfromProto(request.getTaskSpec()), request.getAppAttemptNumber(), @@ -553,12 +556,12 @@ private void checkAndThrowExceptionForTests(SubmitWorkRequestProto request) thro } TaskSpec taskSpec = ProtoConverters.getTaskSpecfromProto(request.getTaskSpec()); - if (taskSpec.getTaskAttemptID().getTaskID().getId() == 0 && + if (taskSpec.getTaskID().getId() == 0 && taskSpec.getTaskAttemptID().getId() == 0) { LOG.info("Simulating Rejected work"); throw new RejectedExecutionException( "Simulating Rejected work for taskAttemptId=" + taskSpec.getTaskAttemptID()); - } else if (taskSpec.getTaskAttemptID().getTaskID().getId() == 1 && + } else if (taskSpec.getTaskID().getId() == 1 && taskSpec.getTaskAttemptID().getId() == 0) { LOG.info("Simulating Task Setup Failure during launch"); throw new TezException("Simulating Task Setup Failure during launch for taskAttemptId=" + diff --git a/tez-ext-service-tests/src/test/java/org/apache/tez/service/impl/TezTestService.java b/tez-ext-service-tests/src/test/java/org/apache/tez/service/impl/TezTestService.java index 85e9227ea5..db14991c79 100644 --- a/tez-ext-service-tests/src/test/java/org/apache/tez/service/impl/TezTestService.java +++ b/tez-ext-service-tests/src/test/java/org/apache/tez/service/impl/TezTestService.java @@ -19,7 +19,7 @@ import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicReference; -import com.google.common.base.Preconditions; +import org.apache.tez.common.Preconditions; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.service.AbstractService; import org.apache.hadoop.util.StringUtils; diff --git a/tez-ext-service-tests/src/test/java/org/apache/tez/shufflehandler/IndexCache.java b/tez-ext-service-tests/src/test/java/org/apache/tez/shufflehandler/IndexCache.java index e358fcc61e..51224cd8cf 100644 --- a/tez-ext-service-tests/src/test/java/org/apache/tez/shufflehandler/IndexCache.java +++ b/tez-ext-service-tests/src/test/java/org/apache/tez/shufflehandler/IndexCache.java @@ -19,6 +19,7 @@ import java.util.concurrent.atomic.AtomicInteger; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.tez.runtime.library.common.Constants; import org.apache.tez.runtime.library.common.sort.impl.TezIndexRecord; @@ -38,11 +39,21 @@ class IndexCache { private final LinkedBlockingQueue queue = new LinkedBlockingQueue(); + private FileSystem fs; public IndexCache(Configuration conf) { this.conf = conf; totalMemoryAllowed = 10 * 1024 * 1024; LOG.info("IndexCache created with max memory = " + totalMemoryAllowed); + initLocalFs(); + } + + private void initLocalFs() { + try { + this.fs = FileSystem.getLocal(conf).getRaw(); + } catch (IOException e) { + throw new RuntimeException(e); + } } /** @@ -114,7 +125,7 @@ private IndexInformation readIndexFileToCache(Path indexFileName, LOG.debug("IndexCache MISS: MapId " + mapId + " not found") ; TezSpillRecord tmp = null; try { - tmp = new TezSpillRecord(indexFileName, conf, expectedIndexOwner); + tmp = new TezSpillRecord(indexFileName, fs, expectedIndexOwner); } catch (Throwable e) { tmp = new TezSpillRecord(0); cache.remove(mapId); diff --git a/tez-ext-service-tests/src/test/java/org/apache/tez/shufflehandler/ShuffleHandler.java b/tez-ext-service-tests/src/test/java/org/apache/tez/shufflehandler/ShuffleHandler.java index ebaf9fe601..43f24ba654 100644 --- a/tez-ext-service-tests/src/test/java/org/apache/tez/shufflehandler/ShuffleHandler.java +++ b/tez-ext-service-tests/src/test/java/org/apache/tez/shufflehandler/ShuffleHandler.java @@ -14,17 +14,17 @@ package org.apache.tez.shufflehandler; -import static org.jboss.netty.buffer.ChannelBuffers.wrappedBuffer; -import static org.jboss.netty.handler.codec.http.HttpHeaders.Names.CONTENT_TYPE; -import static org.jboss.netty.handler.codec.http.HttpMethod.GET; -import static org.jboss.netty.handler.codec.http.HttpResponseStatus.BAD_REQUEST; -import static org.jboss.netty.handler.codec.http.HttpResponseStatus.FORBIDDEN; -import static org.jboss.netty.handler.codec.http.HttpResponseStatus.INTERNAL_SERVER_ERROR; -import static org.jboss.netty.handler.codec.http.HttpResponseStatus.METHOD_NOT_ALLOWED; -import static org.jboss.netty.handler.codec.http.HttpResponseStatus.NOT_FOUND; -import static org.jboss.netty.handler.codec.http.HttpResponseStatus.OK; -import static org.jboss.netty.handler.codec.http.HttpResponseStatus.UNAUTHORIZED; -import static org.jboss.netty.handler.codec.http.HttpVersion.HTTP_1_1; +import static io.netty.buffer.Unpooled.wrappedBuffer; +import static io.netty.handler.codec.http.HttpHeaders.Names.CONTENT_TYPE; +import static io.netty.handler.codec.http.HttpMethod.GET; +import static io.netty.handler.codec.http.HttpResponseStatus.BAD_REQUEST; +import static io.netty.handler.codec.http.HttpResponseStatus.FORBIDDEN; +import static io.netty.handler.codec.http.HttpResponseStatus.INTERNAL_SERVER_ERROR; +import static io.netty.handler.codec.http.HttpResponseStatus.METHOD_NOT_ALLOWED; +import static io.netty.handler.codec.http.HttpResponseStatus.NOT_FOUND; +import static io.netty.handler.codec.http.HttpResponseStatus.OK; +import static io.netty.handler.codec.http.HttpResponseStatus.UNAUTHORIZED; +import static io.netty.handler.codec.http.HttpVersion.HTTP_1_1; import javax.crypto.SecretKey; import java.io.File; @@ -41,15 +41,15 @@ import java.util.Map; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentMap; -import java.util.concurrent.Executors; import java.util.concurrent.ThreadFactory; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicInteger; import java.util.regex.Pattern; import com.google.common.base.Charsets; -import com.google.common.base.Preconditions; -import com.google.common.util.concurrent.ThreadFactoryBuilder; + +import org.apache.tez.common.Preconditions; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.LocalDirAllocator; import org.apache.hadoop.fs.Path; @@ -64,36 +64,38 @@ import org.apache.tez.runtime.library.common.security.SecureShuffleUtils; import org.apache.tez.runtime.library.common.shuffle.orderedgrouped.ShuffleHeader; import org.apache.tez.runtime.library.common.sort.impl.TezIndexRecord; -import org.jboss.netty.bootstrap.ServerBootstrap; -import org.jboss.netty.buffer.ChannelBuffers; -import org.jboss.netty.channel.Channel; -import org.jboss.netty.channel.ChannelFactory; -import org.jboss.netty.channel.ChannelFuture; -import org.jboss.netty.channel.ChannelFutureListener; -import org.jboss.netty.channel.ChannelHandlerContext; -import org.jboss.netty.channel.ChannelPipeline; -import org.jboss.netty.channel.ChannelPipelineFactory; -import org.jboss.netty.channel.ChannelStateEvent; -import org.jboss.netty.channel.Channels; -import org.jboss.netty.channel.DefaultFileRegion; -import org.jboss.netty.channel.ExceptionEvent; -import org.jboss.netty.channel.MessageEvent; -import org.jboss.netty.channel.SimpleChannelUpstreamHandler; -import org.jboss.netty.channel.group.ChannelGroup; -import org.jboss.netty.channel.group.DefaultChannelGroup; -import org.jboss.netty.channel.socket.nio.NioServerSocketChannelFactory; -import org.jboss.netty.handler.codec.frame.TooLongFrameException; -import org.jboss.netty.handler.codec.http.DefaultHttpResponse; -import org.jboss.netty.handler.codec.http.HttpChunkAggregator; -import org.jboss.netty.handler.codec.http.HttpHeaders; -import org.jboss.netty.handler.codec.http.HttpRequest; -import org.jboss.netty.handler.codec.http.HttpRequestDecoder; -import org.jboss.netty.handler.codec.http.HttpResponse; -import org.jboss.netty.handler.codec.http.HttpResponseEncoder; -import org.jboss.netty.handler.codec.http.HttpResponseStatus; -import org.jboss.netty.handler.codec.http.QueryStringDecoder; -import org.jboss.netty.handler.stream.ChunkedWriteHandler; -import org.jboss.netty.util.CharsetUtil; +import io.netty.bootstrap.ServerBootstrap; +import io.netty.buffer.Unpooled; +import io.netty.channel.Channel; +import io.netty.channel.ChannelFuture; +import io.netty.channel.ChannelFutureListener; +import io.netty.channel.ChannelHandlerContext; +import io.netty.channel.ChannelInboundHandlerAdapter; +import io.netty.channel.ChannelInitializer; +import io.netty.channel.ChannelPipeline; +import io.netty.channel.DefaultFileRegion; +import io.netty.channel.ChannelHandler.Sharable; +import io.netty.channel.group.ChannelGroup; +import io.netty.channel.group.DefaultChannelGroup; +import io.netty.channel.nio.NioEventLoopGroup; +import io.netty.channel.socket.nio.NioServerSocketChannel; +import io.netty.channel.socket.nio.NioSocketChannel; +import io.netty.handler.codec.TooLongFrameException; +import io.netty.handler.codec.http.DefaultFullHttpResponse; +import io.netty.handler.codec.http.DefaultHttpResponse; +import io.netty.handler.codec.http.FullHttpResponse; +import io.netty.handler.codec.http.HttpHeaders; +import io.netty.handler.codec.http.HttpObjectAggregator; +import io.netty.handler.codec.http.HttpRequest; +import io.netty.handler.codec.http.HttpRequestDecoder; +import io.netty.handler.codec.http.HttpResponse; +import io.netty.handler.codec.http.HttpResponseEncoder; +import io.netty.handler.codec.http.HttpResponseStatus; +import io.netty.handler.codec.http.QueryStringDecoder; +import io.netty.handler.stream.ChunkedWriteHandler; +import io.netty.util.CharsetUtil; +import io.netty.util.concurrent.GlobalEventExecutor; + import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -110,9 +112,13 @@ public class ShuffleHandler { Pattern.CASE_INSENSITIVE); private int port; - private final ChannelFactory selector; - private final ChannelGroup accepted = new DefaultChannelGroup(); - protected HttpPipelineFactory pipelineFact; + + // pipeline items + private Shuffle SHUFFLE; + + private NioEventLoopGroup bossGroup; + private NioEventLoopGroup workerGroup; + private final ChannelGroup accepted = new DefaultChannelGroup(GlobalEventExecutor.INSTANCE); private final Configuration conf; private final ConcurrentMap registeredApps = new ConcurrentHashMap(); @@ -171,17 +177,23 @@ public ShuffleHandler(Configuration conf) { maxShuffleThreads = 2 * Runtime.getRuntime().availableProcessors(); } - ThreadFactory bossFactory = new ThreadFactoryBuilder() - .setNameFormat("ShuffleHandler Netty Boss #%d") - .build(); - ThreadFactory workerFactory = new ThreadFactoryBuilder() - .setNameFormat("ShuffleHandler Netty Worker #%d") - .build(); - - selector = new NioServerSocketChannelFactory( - Executors.newCachedThreadPool(bossFactory), - Executors.newCachedThreadPool(workerFactory), - maxShuffleThreads); + final String BOSS_THREAD_NAME_PREFIX = "ShuffleHandler Netty Boss #"; + AtomicInteger bossThreadCounter = new AtomicInteger(0); + bossGroup = new NioEventLoopGroup(maxShuffleThreads, new ThreadFactory() { + @Override + public Thread newThread(Runnable r) { + return new Thread(r, BOSS_THREAD_NAME_PREFIX + bossThreadCounter.incrementAndGet()); + } + }); + + final String WORKER_THREAD_NAME_PREFIX = "ShuffleHandler Netty Worker #"; + AtomicInteger workerThreadCounter = new AtomicInteger(0); + workerGroup = new NioEventLoopGroup(maxShuffleThreads, new ThreadFactory() { + @Override + public Thread newThread(Runnable r) { + return new Thread(r, WORKER_THREAD_NAME_PREFIX + workerThreadCounter.incrementAndGet()); + } + }); connectionKeepAliveEnabled = conf.getBoolean(SHUFFLE_CONNECTION_KEEP_ALIVE_ENABLED, @@ -199,22 +211,44 @@ public ShuffleHandler(Configuration conf) { public void start() throws Exception { - ServerBootstrap bootstrap = new ServerBootstrap(selector); - try { - pipelineFact = new HttpPipelineFactory(conf); - } catch (Exception ex) { - throw new RuntimeException(ex); - } - bootstrap.setPipelineFactory(pipelineFact); + ServerBootstrap bootstrap = new ServerBootstrap() + .channel(NioServerSocketChannel.class) + .group(bossGroup, workerGroup) + .localAddress(port); + initPipeline(bootstrap, conf); port = conf.getInt(SHUFFLE_PORT_CONFIG_KEY, DEFAULT_SHUFFLE_PORT); - Channel ch = bootstrap.bind(new InetSocketAddress(port)); + Channel ch = bootstrap.bind().sync().channel(); accepted.add(ch); - port = ((InetSocketAddress)ch.getLocalAddress()).getPort(); + port = ((InetSocketAddress)ch.localAddress()).getPort(); conf.set(SHUFFLE_PORT_CONFIG_KEY, Integer.toString(port)); - pipelineFact.SHUFFLE.setPort(port); + SHUFFLE.setPort(port); LOG.info("TezShuffleHandler" + " listening on port " + port); } + private void initPipeline(ServerBootstrap bootstrap, Configuration conf) throws Exception { + SHUFFLE = getShuffle(conf); + + if (conf.getBoolean(MRConfig.SHUFFLE_SSL_ENABLED_KEY, + MRConfig.SHUFFLE_SSL_ENABLED_DEFAULT)) { + throw new UnsupportedOperationException( + "SSL Shuffle is not currently supported for the test shuffle handler"); + } + + ChannelInitializer channelInitializer = + new ChannelInitializer() { + @Override + public void initChannel(NioSocketChannel ch) throws Exception { + ChannelPipeline pipeline = ch.pipeline(); + pipeline.addLast("decoder", new HttpRequestDecoder()); + pipeline.addLast("aggregator", new HttpObjectAggregator(1 << 16)); + pipeline.addLast("encoder", new HttpResponseEncoder()); + pipeline.addLast("chunking", new ChunkedWriteHandler()); + pipeline.addLast("shuffle", SHUFFLE); + } + }; + bootstrap.childHandler(channelInitializer); + } + public static void initializeAndStart(Configuration conf) throws Exception { if (!initing.getAndSet(true)) { INSTANCE = new ShuffleHandler(conf); @@ -245,15 +279,13 @@ public void unregisterApplication(String applicationIdString) { removeJobShuffleInfo(applicationIdString); } - public void stop() throws Exception { accepted.close().awaitUninterruptibly(10, TimeUnit.SECONDS); - if (selector != null) { - ServerBootstrap bootstrap = new ServerBootstrap(selector); - bootstrap.releaseExternalResources(); + if (bossGroup != null) { + bossGroup.shutdownGracefully(); } - if (pipelineFact != null) { - pipelineFact.destroy(); + if (workerGroup != null) { + workerGroup.shutdownGracefully(); } } @@ -261,7 +293,6 @@ protected Shuffle getShuffle(Configuration conf) { return new Shuffle(conf); } - private void addJobToken(String appIdString, String user, Token jobToken) { String jobIdString = appIdString.replace("application", "job"); @@ -280,40 +311,8 @@ private void removeJobShuffleInfo(String appIdString) { userRsrc.remove(appIdString); } - class HttpPipelineFactory implements ChannelPipelineFactory { - - final Shuffle SHUFFLE; - - public HttpPipelineFactory(Configuration conf) throws Exception { - SHUFFLE = getShuffle(conf); - // TODO Setup SSL Shuffle - if (conf.getBoolean(MRConfig.SHUFFLE_SSL_ENABLED_KEY, - MRConfig.SHUFFLE_SSL_ENABLED_DEFAULT)) { - throw new UnsupportedOperationException( - "SSL Shuffle is not currently supported for the test shuffle handler"); - } - } - - public void destroy() { - } - - @Override - public ChannelPipeline getPipeline() throws Exception { - ChannelPipeline pipeline = Channels.pipeline(); - pipeline.addLast("decoder", new HttpRequestDecoder()); - pipeline.addLast("aggregator", new HttpChunkAggregator(1 << 16)); - pipeline.addLast("encoder", new HttpResponseEncoder()); - pipeline.addLast("chunking", new ChunkedWriteHandler()); - pipeline.addLast("shuffle", SHUFFLE); - return pipeline; - // TODO factor security manager into pipeline - // TODO factor out encode/decode to permit binary shuffle - // TODO factor out decode of index to permit alt. models - } - - } - - class Shuffle extends SimpleChannelUpstreamHandler { + @Sharable + class Shuffle extends ChannelInboundHandlerAdapter { private final Configuration conf; private final IndexCache indexCache; @@ -343,37 +342,36 @@ private List splitMaps(List mapq) { } @Override - public void channelOpen(ChannelHandlerContext ctx, ChannelStateEvent evt) + public void channelActive(ChannelHandlerContext ctx) throws Exception { + if ((maxShuffleConnections > 0) && (accepted.size() >= maxShuffleConnections)) { - LOG.info(String.format("Current number of shuffle connections (%d) is " + - "greater than or equal to the max allowed shuffle connections (%d)", + LOG.info(String.format("Current number of shuffle connections (%d) is " + + "greater than or equal to the max allowed shuffle connections (%d)", accepted.size(), maxShuffleConnections)); - evt.getChannel().close(); + ctx.channel().close(); return; } - accepted.add(evt.getChannel()); - super.channelOpen(ctx, evt); - + accepted.add(ctx.channel()); + super.channelActive(ctx); } @Override - public void messageReceived(ChannelHandlerContext ctx, MessageEvent evt) + public void channelRead(ChannelHandlerContext ctx, Object message) throws Exception { - HttpRequest request = (HttpRequest) evt.getMessage(); + HttpRequest request = (HttpRequest) message; if (request.getMethod() != GET) { sendError(ctx, METHOD_NOT_ALLOWED); return; } // Check whether the shuffle version is compatible if (!ShuffleHeader.DEFAULT_HTTP_HEADER_NAME.equals( - request.getHeader(ShuffleHeader.HTTP_HEADER_NAME)) + request.headers().get(ShuffleHeader.HTTP_HEADER_NAME)) || !ShuffleHeader.DEFAULT_HTTP_HEADER_VERSION.equals( - request.getHeader(ShuffleHeader.HTTP_HEADER_VERSION))) { + request.headers().get(ShuffleHeader.HTTP_HEADER_VERSION))) { sendError(ctx, "Incompatible shuffle request version", BAD_REQUEST); } - final Map> q = - new QueryStringDecoder(request.getUri()).getParameters(); + final Map> q = new QueryStringDecoder(request.getUri()).parameters(); final List keepAliveList = q.get("keepAlive"); boolean keepAliveParam = false; if (keepAliveList != null && keepAliveList.size() == 1) { @@ -432,7 +430,7 @@ public void messageReceived(ChannelHandlerContext ctx, MessageEvent evt) Map mapOutputInfoMap = new HashMap(); - Channel ch = evt.getChannel(); + Channel ch = ctx.channel(); String user = userRsrc.get(jobId); // $x/$user/appcache/$appId/output/$mapId @@ -444,13 +442,13 @@ public void messageReceived(ChannelHandlerContext ctx, MessageEvent evt) populateHeaders(mapIds, outputBasePathStr, user, reduceId, request, response, keepAliveParam, mapOutputInfoMap); } catch(IOException e) { - ch.write(response); + ch.writeAndFlush(response); LOG.error("Shuffle error in populating headers :", e); String errorMessage = getErrorMessage(e); sendError(ctx,errorMessage , INTERNAL_SERVER_ERROR); return; } - ch.write(response); + ch.writeAndFlush(response); // TODO refactor the following into the pipeline ChannelFuture lastMap = null; for (String mapId : mapIds) { @@ -551,12 +549,12 @@ protected void setResponseHeaders(HttpResponse response, boolean keepAliveParam, long contentLength) { if (!connectionKeepAliveEnabled && !keepAliveParam) { LOG.info("Setting connection close header..."); - response.setHeader(HttpHeaders.Names.CONNECTION, CONNECTION_CLOSE); + response.headers().set(HttpHeaders.Names.CONNECTION, CONNECTION_CLOSE); } else { - response.setHeader(HttpHeaders.Names.CONTENT_LENGTH, + response.headers().set(HttpHeaders.Names.CONTENT_LENGTH, String.valueOf(contentLength)); - response.setHeader(HttpHeaders.Names.CONNECTION, HttpHeaders.Values.KEEP_ALIVE); - response.setHeader(HttpHeaders.Values.KEEP_ALIVE, "timeout=" + response.headers().set(HttpHeaders.Names.CONNECTION, HttpHeaders.Values.KEEP_ALIVE); + response.headers().set(HttpHeaders.Values.KEEP_ALIVE, "timeout=" + connectionKeepAliveTimeOut); LOG.info("Content Length in shuffle : " + contentLength); } @@ -584,7 +582,7 @@ protected void verifyRequest(String appid, ChannelHandlerContext ctx, String enc_str = SecureShuffleUtils.buildMsgFrom(requestUri); // hash from the fetcher String urlHashStr = - request.getHeader(SecureShuffleUtils.HTTP_HEADER_URL_HASH); + request.headers().get(SecureShuffleUtils.HTTP_HEADER_URL_HASH); if (urlHashStr == null) { LOG.info("Missing header hash for " + appid); throw new IOException("fetcher cannot be authenticated"); @@ -600,11 +598,11 @@ protected void verifyRequest(String appid, ChannelHandlerContext ctx, String reply = SecureShuffleUtils.generateHash(urlHashStr.getBytes(Charsets.UTF_8), tokenSecret); - response.setHeader(SecureShuffleUtils.HTTP_HEADER_REPLY_URL_HASH, reply); + response.headers().set(SecureShuffleUtils.HTTP_HEADER_REPLY_URL_HASH, reply); // Put shuffle version into http header - response.setHeader(ShuffleHeader.HTTP_HEADER_NAME, + response.headers().set(ShuffleHeader.HTTP_HEADER_NAME, ShuffleHeader.DEFAULT_HTTP_HEADER_NAME); - response.setHeader(ShuffleHeader.HTTP_HEADER_VERSION, + response.headers().set(ShuffleHeader.HTTP_HEADER_VERSION, ShuffleHeader.DEFAULT_HTTP_HEADER_VERSION); if (LOG.isDebugEnabled()) { int len = reply.length(); @@ -621,7 +619,7 @@ protected ChannelFuture sendMapOutput(ChannelHandlerContext ctx, Channel ch, new ShuffleHeader(mapId, info.getPartLength(), info.getRawLength(), reduce); final DataOutputBuffer dob = new DataOutputBuffer(); header.write(dob); - ch.write(wrappedBuffer(dob.getData(), 0, dob.getLength())); + ch.writeAndFlush(wrappedBuffer(dob.getData(), 0, dob.getLength())); final File spillfile = new File(mapOutputInfo.mapOutputFileName.toString()); RandomAccessFile spill; @@ -634,15 +632,7 @@ protected ChannelFuture sendMapOutput(ChannelHandlerContext ctx, Channel ch, ChannelFuture writeFuture; final DefaultFileRegion partition = new DefaultFileRegion(spill.getChannel(), info.getStartOffset(), info.getPartLength()); - writeFuture = ch.write(partition); - writeFuture.addListener(new ChannelFutureListener() { - // TODO error handling; distinguish IO/connection failures, - // attribute to appropriate spill output - @Override - public void operationComplete(ChannelFuture future) { - partition.releaseExternalResources(); - } - }); + writeFuture = ch.writeAndFlush(partition); return writeFuture; } @@ -653,25 +643,22 @@ protected void sendError(ChannelHandlerContext ctx, protected void sendError(ChannelHandlerContext ctx, String message, HttpResponseStatus status) { - HttpResponse response = new DefaultHttpResponse(HTTP_1_1, status); - response.setHeader(CONTENT_TYPE, "text/plain; charset=UTF-8"); + FullHttpResponse response = new DefaultFullHttpResponse(HTTP_1_1, status); + response.headers().set(CONTENT_TYPE, "text/plain; charset=UTF-8"); // Put shuffle version into http header - response.setHeader(ShuffleHeader.HTTP_HEADER_NAME, + response.headers().set(ShuffleHeader.HTTP_HEADER_NAME, ShuffleHeader.DEFAULT_HTTP_HEADER_NAME); - response.setHeader(ShuffleHeader.HTTP_HEADER_VERSION, + response.headers().set(ShuffleHeader.HTTP_HEADER_VERSION, ShuffleHeader.DEFAULT_HTTP_HEADER_VERSION); - response.setContent( - ChannelBuffers.copiedBuffer(message, CharsetUtil.UTF_8)); + response.content().writeBytes(Unpooled.copiedBuffer(message, CharsetUtil.UTF_8)); // Close the connection as soon as the error message is sent. - ctx.getChannel().write(response).addListener(ChannelFutureListener.CLOSE); + ctx.channel().writeAndFlush(response).addListener(ChannelFutureListener.CLOSE); } @Override - public void exceptionCaught(ChannelHandlerContext ctx, ExceptionEvent e) + public void exceptionCaught(ChannelHandlerContext ctx, Throwable cause) throws Exception { - Channel ch = e.getChannel(); - Throwable cause = e.getCause(); if (cause instanceof TooLongFrameException) { sendError(ctx, BAD_REQUEST); return; @@ -688,8 +675,8 @@ public void exceptionCaught(ChannelHandlerContext ctx, ExceptionEvent e) } LOG.error("Shuffle error: ", cause); - if (ch.isConnected()) { - LOG.error("Shuffle error " + e); + if (ctx.channel().isActive()) { + LOG.error("Shuffle error", cause); sendError(ctx, INTERNAL_SERVER_ERROR); } } diff --git a/tez-ext-service-tests/src/test/java/org/apache/tez/tests/ExternalTezServiceTestHelper.java b/tez-ext-service-tests/src/test/java/org/apache/tez/tests/ExternalTezServiceTestHelper.java index 14c19b5676..c3831223c7 100644 --- a/tez-ext-service-tests/src/test/java/org/apache/tez/tests/ExternalTezServiceTestHelper.java +++ b/tez-ext-service-tests/src/test/java/org/apache/tez/tests/ExternalTezServiceTestHelper.java @@ -18,8 +18,8 @@ import java.io.IOException; import java.util.Map; +import java.util.Objects; -import com.google.common.base.Preconditions; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; @@ -188,7 +188,7 @@ public FileSystem getRemoteFs() { } public TezClient getSharedTezClient() { - Preconditions.checkNotNull(sharedTezClient); + Objects.requireNonNull(sharedTezClient); return sharedTezClient; } } diff --git a/tez-ext-service-tests/src/test/java/org/apache/tez/tests/TestExtServicesWithLocalMode.java b/tez-ext-service-tests/src/test/java/org/apache/tez/tests/TestExtServicesWithLocalMode.java index 3d8c08781b..c0bfe76bd4 100644 --- a/tez-ext-service-tests/src/test/java/org/apache/tez/tests/TestExtServicesWithLocalMode.java +++ b/tez-ext-service-tests/src/test/java/org/apache/tez/tests/TestExtServicesWithLocalMode.java @@ -20,6 +20,7 @@ import static org.junit.Assert.assertEquals; +import java.io.File; import java.io.IOException; import java.util.Map; @@ -59,6 +60,8 @@ public class TestExtServicesWithLocalMode { private static String TEST_ROOT_DIR = "target" + Path.SEPARATOR + TestExtServicesWithLocalMode.class.getName() + "-tmpDir"; + private static final String STAGING_DIR = new File(System.getProperty("test.build.data"), + TestExtServicesWithLocalMode.class.getName()).getAbsolutePath(); private static final Path SRC_DATA_DIR = new Path(TEST_ROOT_DIR + Path.SEPARATOR + "data"); private static final Path HASH_JOIN_EXPECTED_RESULT_PATH = @@ -92,6 +95,7 @@ public static void setup() throws Exception { confForJobs.set(entry.getKey(), entry.getValue()); } confForJobs.setBoolean(TezConfiguration.TEZ_LOCAL_MODE, true); + confForJobs.set(TezConfiguration.TEZ_AM_STAGING_DIR, STAGING_DIR.toString()); } @AfterClass diff --git a/tez-ext-service-tests/src/test/java/org/apache/tez/tests/TestExternalTezServices.java b/tez-ext-service-tests/src/test/java/org/apache/tez/tests/TestExternalTezServices.java index 920534a598..c135d7a433 100644 --- a/tez-ext-service-tests/src/test/java/org/apache/tez/tests/TestExternalTezServices.java +++ b/tez-ext-service-tests/src/test/java/org/apache/tez/tests/TestExternalTezServices.java @@ -200,7 +200,7 @@ private void runExceptionSimulation() throws IOException, TezException, Interrup DAGStatus dagStatus = dagClient.waitForCompletion(); assertEquals(DAGStatus.State.SUCCEEDED, dagStatus.getState()); assertEquals(1, dagStatus.getDAGProgress().getFailedTaskAttemptCount()); - assertEquals(1, dagStatus.getDAGProgress().getKilledTaskAttemptCount()); + assertEquals(1, dagStatus.getDAGProgress().getRejectedTaskAttemptCount()); } diff --git a/tez-ext-service-tests/src/test/java/org/apache/tez/util/ProtoConverters.java b/tez-ext-service-tests/src/test/java/org/apache/tez/util/ProtoConverters.java index 25d61d0bf5..a595210df3 100644 --- a/tez-ext-service-tests/src/test/java/org/apache/tez/util/ProtoConverters.java +++ b/tez-ext-service-tests/src/test/java/org/apache/tez/util/ProtoConverters.java @@ -37,7 +37,9 @@ import org.apache.tez.test.service.rpc.TezTestServiceProtocolProtos.TaskSpecProto; import org.apache.tez.test.service.rpc.TezTestServiceProtocolProtos.TaskSpecProto.Builder; -public class ProtoConverters { +public final class ProtoConverters { + + private ProtoConverters() {} public static TaskSpec getTaskSpecfromProto(TaskSpecProto taskSpecProto) { TezTaskAttemptID taskAttemptID = diff --git a/tez-mapreduce/pom.xml b/tez-mapreduce/pom.xml index 24b443223e..d2d1c887b9 100644 --- a/tez-mapreduce/pom.xml +++ b/tez-mapreduce/pom.xml @@ -20,10 +20,14 @@ org.apache.tez tez - 0.9.1-SNAPSHOT + 0.10.5-SNAPSHOT tez-mapreduce + + false + + org.apache.tez @@ -39,6 +43,10 @@ test-jar test + + org.apache.tez + hadoop-shim + org.apache.tez tez-runtime-library @@ -84,10 +92,6 @@ commons-lang commons-lang - - commons-collections - commons-collections - org.apache.tez tez-runtime-internals @@ -95,7 +99,7 @@ org.mockito - mockito-all + mockito-core test @@ -120,6 +124,10 @@ junit test + + org.apache.commons + commons-collections4 + com.google.protobuf protobuf-java @@ -133,28 +141,26 @@ apache-rat-plugin - org.apache.hadoop - hadoop-maven-plugins + com.github.os72 + protoc-jar-maven-plugin - compile-protoc generate-sources - protoc + run - ${protobuf.version} + com.google.protobuf:protoc:${protobuf.version} ${protoc.path} - - ${basedir}/src/main/proto - - - ${basedir}/src/main/proto - - MRRuntimeProtos.proto - - - ${project.build.directory}/generated-sources/java + none + + ${basedir}/src/main/proto + + + + ${project.build.directory}/generated-sources/java + + diff --git a/tez-mapreduce/src/main/java/org/apache/hadoop/mapred/split/TezGroupedSplitsInputFormat.java b/tez-mapreduce/src/main/java/org/apache/hadoop/mapred/split/TezGroupedSplitsInputFormat.java index e082e3a3ab..bdeba2a0a8 100644 --- a/tez-mapreduce/src/main/java/org/apache/hadoop/mapred/split/TezGroupedSplitsInputFormat.java +++ b/tez-mapreduce/src/main/java/org/apache/hadoop/mapred/split/TezGroupedSplitsInputFormat.java @@ -19,7 +19,17 @@ package org.apache.hadoop.mapred.split; import java.io.IOException; +import java.util.Objects; +import java.util.concurrent.BlockingDeque; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; +import java.util.concurrent.LinkedBlockingDeque; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicInteger; +import com.google.common.util.concurrent.ThreadFactoryBuilder; +import org.apache.tez.mapreduce.grouper.TezSplitGrouper; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.classification.InterfaceAudience.Public; @@ -34,7 +44,7 @@ import org.apache.tez.common.ReflectionUtils; import org.apache.tez.dag.api.TezException; -import com.google.common.base.Preconditions; +import org.apache.tez.common.Preconditions; /** * An InputFormat that provides a generic grouping around the splits @@ -66,27 +76,19 @@ public void setInputFormat(InputFormat wrappedInputFormat) { } public void setSplitSizeEstimator(SplitSizeEstimator estimator) { - Preconditions.checkArgument(estimator != null); - this.estimator = estimator; - if (LOG.isDebugEnabled()) { - LOG.debug("Split size estimator : " + estimator); - } + this.estimator = Objects.requireNonNull(estimator); + LOG.debug("Split size estimator : {}", estimator); } public void setSplitLocationProvider(SplitLocationProvider locationProvider) { - Preconditions.checkArgument(locationProvider != null); - this.locationProvider = locationProvider; - if (LOG.isDebugEnabled()) { - LOG.debug("Split size location provider: " + locationProvider); - } + this.locationProvider = Objects.requireNonNull(locationProvider); + LOG.debug("Split size location provider: {}", locationProvider); } public void setDesiredNumberOfSplits(int num) { Preconditions.checkArgument(num >= 0); this.desiredNumSplits = num; - if (LOG.isDebugEnabled()) { - LOG.debug("desiredNumSplits: " + desiredNumSplits); - } + LOG.debug("desiredNumSplits: {}", desiredNumSplits); } @Override @@ -136,14 +138,69 @@ public class TezGroupedSplitsRecordReader implements RecordReader { int idx = 0; long progress; RecordReader curReader; - + private final AtomicInteger initIndex; + private final int numReaders; + private ExecutorService initReaderExecService; + private BlockingDeque>> initedReaders; + private AtomicBoolean failureOccurred = new AtomicBoolean(false); + public TezGroupedSplitsRecordReader(TezGroupedSplit split, JobConf job, Reporter reporter) throws IOException { this.groupedSplit = split; this.job = job; this.reporter = reporter; + this.initIndex = new AtomicInteger(0); + int numThreads = conf.getInt(TezSplitGrouper.TEZ_GROUPING_SPLIT_INIT_THREADS, + TezSplitGrouper.TEZ_GROUPING_SPLIT_INIT_THREADS_DEFAULT); + this.numReaders = Math.min(groupedSplit.wrappedSplits.size(), + conf.getInt(TezSplitGrouper.TEZ_GROUPING_SPLIT_INIT_RECORDREADERS, + TezSplitGrouper.TEZ_GROUPING_SPLIT_INIT_RECORDREADERS_DEFAULT)); + // init the async split opening executor service if numReaders are greater than 1 + if (numReaders > 1) { + this.initReaderExecService = Executors.newFixedThreadPool(numThreads, + new ThreadFactoryBuilder() + .setDaemon(true) + .setPriority(Thread.MAX_PRIORITY) + .setNameFormat("TEZ-Split-Init-Thread-%d") + .build()); + this.initedReaders = new LinkedBlockingDeque<>(); + } initNextRecordReader(); } + + private void preInitReaders() { + if (initReaderExecService == null) { + return; + } + for (int i = 0; i < numReaders; i++) { + initedReaders.offer(this.initReaderExecService.submit(() -> { + if (failureOccurred.get()) { + return null; + } + try { + int index = initIndex.getAndIncrement(); + if (index >= groupedSplit.wrappedSplits.size()) { + return null; + } + InputSplit s = groupedSplit.wrappedSplits.get(index); + RecordReader reader = wrappedInputFormat.getRecordReader(s, job, reporter); + LOG.debug("Init Thread processed reader number {} initialization", index); + return reader; + } catch (Exception e) { + failureOccurred.set(true); + if (e instanceof InterruptedException) { + Thread.currentThread().interrupt(); + } + cancelFutures(); + throw new RuntimeException(e); + } + })); + } + } + + public RecordReader getCurReader() { + return curReader; + } @Override public boolean next(K key, V value) throws IOException { @@ -178,7 +235,7 @@ public void close() throws IOException { curReader = null; } } - + protected boolean initNextRecordReader() throws IOException { if (curReader != null) { curReader.close(); @@ -190,23 +247,45 @@ protected boolean initNextRecordReader() throws IOException { // if all chunks have been processed, nothing more to do. if (idx == groupedSplit.wrappedSplits.size()) { + if (initReaderExecService != null) { + LOG.info("Shutting down the init record reader threadpool"); + initReaderExecService.shutdownNow(); + } return false; } if (LOG.isDebugEnabled()) { - LOG.debug("Init record reader for index " + idx + " of " + + LOG.debug("Init record reader for index " + idx + " of " + groupedSplit.wrappedSplits.size()); } // get a record reader for the idx-th chunk try { - curReader = wrappedInputFormat.getRecordReader( - groupedSplit.wrappedSplits.get(idx), job, reporter); + // get the cur reader directly when async split opening is disabled + if (initReaderExecService == null) { + curReader = wrappedInputFormat.getRecordReader(groupedSplit.wrappedSplits.get(idx), job, reporter); + } else { + preInitReaders(); + curReader = initedReaders.take().get(); + } } catch (Exception e) { - throw new RuntimeException (e); + failureOccurred.set(true); + if (e instanceof InterruptedException) { + Thread.currentThread().interrupt(); + } + if (initedReaders != null) { + cancelFutures(); + } + throw new RuntimeException(e); } idx++; - return true; + return curReader != null; + } + + private void cancelFutures() { + for (Future> f : initedReaders) { + f.cancel(true); + } } @Override diff --git a/tez-mapreduce/src/main/java/org/apache/hadoop/mapred/split/TezMapredSplitsGrouper.java b/tez-mapreduce/src/main/java/org/apache/hadoop/mapred/split/TezMapredSplitsGrouper.java index 2bfccfa9df..38a213ec9a 100644 --- a/tez-mapreduce/src/main/java/org/apache/hadoop/mapred/split/TezMapredSplitsGrouper.java +++ b/tez-mapreduce/src/main/java/org/apache/hadoop/mapred/split/TezMapredSplitsGrouper.java @@ -21,9 +21,9 @@ import java.io.IOException; import java.util.Arrays; import java.util.List; +import java.util.Objects; import com.google.common.base.Function; -import com.google.common.base.Preconditions; import com.google.common.collect.Lists; import org.apache.tez.mapreduce.grouper.GroupedSplitContainer; import org.apache.tez.mapreduce.grouper.MapredSplitContainer; @@ -66,7 +66,7 @@ public InputSplit[] getGroupedSplits(Configuration conf, public InputSplit[] getGroupedSplits(Configuration conf, InputSplit[] originalSplits, int desiredNumSplits, String wrappedInputFormatName, SplitSizeEstimator estimator, SplitLocationProvider locationProvider) throws IOException { - Preconditions.checkArgument(originalSplits != null, "Splits must be specified"); + Objects.requireNonNull(originalSplits, "Splits must be specified"); List originalSplitContainers = Lists.transform(Arrays.asList(originalSplits), new Function() { diff --git a/tez-mapreduce/src/main/java/org/apache/hadoop/mapreduce/split/SplitMetaInfoReaderTez.java b/tez-mapreduce/src/main/java/org/apache/hadoop/mapreduce/split/SplitMetaInfoReaderTez.java index 0d703e073b..d69d21127b 100644 --- a/tez-mapreduce/src/main/java/org/apache/hadoop/mapreduce/split/SplitMetaInfoReaderTez.java +++ b/tez-mapreduce/src/main/java/org/apache/hadoop/mapreduce/split/SplitMetaInfoReaderTez.java @@ -33,6 +33,7 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.WritableUtils; import org.apache.hadoop.mapreduce.split.JobSplit.TaskSplitMetaInfo; +import org.apache.hadoop.util.functional.FutureIO; import org.apache.tez.common.MRFrameworkConfigs; import org.apache.tez.mapreduce.hadoop.MRJobConfig; @@ -41,31 +42,28 @@ */ @InterfaceAudience.Private @InterfaceStability.Unstable -public class SplitMetaInfoReaderTez { +public final class SplitMetaInfoReaderTez { public static final Logger LOG = LoggerFactory.getLogger(SplitMetaInfoReaderTez.class); public static final int META_SPLIT_VERSION = JobSplit.META_SPLIT_VERSION; public static final byte[] META_SPLIT_FILE_HEADER = JobSplit.META_SPLIT_FILE_HEADER; + private SplitMetaInfoReaderTez() {} - // Forked from the MR variant so that the metaInfo file as well as the split - // file can be read from local fs - relying on these files being localized. - public static TaskSplitMetaInfo[] readSplitMetaInfo(Configuration conf, + private static FSDataInputStream getFSDataIS(Configuration conf, FileSystem fs) throws IOException { - long maxMetaInfoSize = conf.getLong( MRJobConfig.SPLIT_METAINFO_MAXSIZE, MRJobConfig.DEFAULT_SPLIT_METAINFO_MAXSIZE); - + FSDataInputStream in = null; // TODO NEWTEZ Figure out how this can be improved. i.e. access from context instead of setting in conf ? String basePath = conf.get(MRFrameworkConfigs.TASK_LOCAL_RESOURCE_DIR, "."); LOG.info("Attempting to find splits in dir: " + basePath); - + Path metaSplitFile = new Path( basePath, MRJobConfig.JOB_SPLIT_METAINFO); - String jobSplitFile = MRJobConfig.JOB_SPLIT; File file = new File(metaSplitFile.toUri().getPath()).getAbsoluteFile(); if (LOG.isDebugEnabled()) { @@ -74,34 +72,95 @@ public static TaskSplitMetaInfo[] readSplitMetaInfo(Configuration conf, + FileSystem.getDefaultUri(conf)); } - FileStatus fStatus = fs.getFileStatus(metaSplitFile); - if (maxMetaInfoSize > 0 && fStatus.getLen() > maxMetaInfoSize) { - throw new IOException("Split metadata size exceeded " + maxMetaInfoSize - + ". Aborting job "); + FileStatus fStatus; + try { + fStatus = fs.getFileStatus(metaSplitFile); + if (maxMetaInfoSize > 0 && fStatus.getLen() > maxMetaInfoSize) { + throw new IOException("Split metadata size exceeded " + maxMetaInfoSize + + ". Aborting job "); + } + in = FutureIO.awaitFuture(fs.openFile(metaSplitFile).withFileStatus(fStatus).build()); + byte[] header = new byte[JobSplit.META_SPLIT_FILE_HEADER.length]; + in.readFully(header); + if (!Arrays.equals(JobSplit.META_SPLIT_FILE_HEADER, header)) { + throw new IOException("Invalid header on split file"); + } + int vers = WritableUtils.readVInt(in); + if (vers != JobSplit.META_SPLIT_VERSION) { + throw new IOException("Unsupported split version " + vers); + } + } catch (IOException e) { + if (in != null) { + in.close(); + } + throw e; } - FSDataInputStream in = fs.open(metaSplitFile); - byte[] header = new byte[JobSplit.META_SPLIT_FILE_HEADER.length]; - in.readFully(header); - if (!Arrays.equals(JobSplit.META_SPLIT_FILE_HEADER, header)) { - throw new IOException("Invalid header on split file"); - } - int vers = WritableUtils.readVInt(in); - if (vers != JobSplit.META_SPLIT_VERSION) { - in.close(); - throw new IOException("Unsupported split version " + vers); + return in; + } + + // Forked from the MR variant so that the metaInfo file as well as the split + // file can be read from local fs - relying on these files being localized. + public static TaskSplitMetaInfo[] readSplitMetaInfo(Configuration conf, + FileSystem fs) throws IOException { + FSDataInputStream in = null; + try { + in = getFSDataIS(conf, fs); + final String jobSplitFile = MRJobConfig.JOB_SPLIT; + final String basePath = conf.get(MRFrameworkConfigs.TASK_LOCAL_RESOURCE_DIR, "."); + int numSplits = WritableUtils.readVInt(in); // TODO: check for insane values + JobSplit.TaskSplitMetaInfo[] allSplitMetaInfo = new JobSplit.TaskSplitMetaInfo[numSplits]; + for (int i = 0; i < numSplits; i++) { + JobSplit.SplitMetaInfo splitMetaInfo = new JobSplit.SplitMetaInfo(); + splitMetaInfo.readFields(in); + JobSplit.TaskSplitIndex splitIndex = new JobSplit.TaskSplitIndex( + new Path(basePath, jobSplitFile) + .toUri().toString(), splitMetaInfo.getStartOffset()); + allSplitMetaInfo[i] = new JobSplit.TaskSplitMetaInfo(splitIndex, + splitMetaInfo.getLocations(), splitMetaInfo.getInputDataLength()); + } + return allSplitMetaInfo; + } finally { + if (in != null) { + in.close(); + } } - int numSplits = WritableUtils.readVInt(in); // TODO: check for insane values - JobSplit.TaskSplitMetaInfo[] allSplitMetaInfo = new JobSplit.TaskSplitMetaInfo[numSplits]; - for (int i = 0; i < numSplits; i++) { + } + + /** + * Get the split meta info for the task with a specific index. This method + * reduces the overhead of creating meta objects below the index of the task. + * + * @param conf job configuration. + * @param fs FileSystem. + * @param index the index of the task. + * @return split meta info object of the task. + */ + public static TaskSplitMetaInfo getSplitMetaInfo(Configuration conf, + FileSystem fs, int index) throws IOException { + FSDataInputStream in = null; + try { + in = getFSDataIS(conf, fs); + final String jobSplitFile = MRJobConfig.JOB_SPLIT; + final String basePath = + conf.get(MRFrameworkConfigs.TASK_LOCAL_RESOURCE_DIR, "."); + final int numSplits = WritableUtils.readVInt(in); // TODO: check for insane values + if (numSplits <= index) { + throw new IOException("Index is larger than the number of splits"); + } JobSplit.SplitMetaInfo splitMetaInfo = new JobSplit.SplitMetaInfo(); - splitMetaInfo.readFields(in); + int iter = 0; + while (iter++ <= index) { + splitMetaInfo.readFields(in); + } JobSplit.TaskSplitIndex splitIndex = new JobSplit.TaskSplitIndex( new Path(basePath, jobSplitFile) .toUri().toString(), splitMetaInfo.getStartOffset()); - allSplitMetaInfo[i] = new JobSplit.TaskSplitMetaInfo(splitIndex, + return new JobSplit.TaskSplitMetaInfo(splitIndex, splitMetaInfo.getLocations(), splitMetaInfo.getInputDataLength()); + } finally { + if (in != null) { + in.close(); + } } - in.close(); - return allSplitMetaInfo; } } diff --git a/tez-mapreduce/src/main/java/org/apache/hadoop/mapreduce/split/TezGroupedSplitsInputFormat.java b/tez-mapreduce/src/main/java/org/apache/hadoop/mapreduce/split/TezGroupedSplitsInputFormat.java index 5988728f44..b85e97e467 100644 --- a/tez-mapreduce/src/main/java/org/apache/hadoop/mapreduce/split/TezGroupedSplitsInputFormat.java +++ b/tez-mapreduce/src/main/java/org/apache/hadoop/mapreduce/split/TezGroupedSplitsInputFormat.java @@ -21,6 +21,7 @@ import java.io.IOException; import java.util.ArrayList; import java.util.List; +import java.util.Objects; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -37,7 +38,7 @@ import org.apache.tez.dag.api.TezException; import org.apache.tez.dag.api.TezUncheckedException; -import com.google.common.base.Preconditions; +import org.apache.tez.common.Preconditions; /** * An InputFormat that provides a generic grouping around @@ -70,25 +71,17 @@ public void setInputFormat(InputFormat wrappedInputFormat) { public void setDesiredNumberOfSplits(int num) { Preconditions.checkArgument(num >= 0); this.desiredNumSplits = num; - if (LOG.isDebugEnabled()) { - LOG.debug("desiredNumSplits: " + desiredNumSplits); - } + LOG.debug("desiredNumSplits: {}", desiredNumSplits); } public void setSplitSizeEstimator(SplitSizeEstimator estimator) { - Preconditions.checkArgument(estimator != null); - this.estimator = estimator; - if (LOG.isDebugEnabled()) { - LOG.debug("Split size estimator : " + estimator); - } + this.estimator = Objects.requireNonNull(estimator); + LOG.debug("Split size estimator : {}", estimator); } public void setSplitLocationProvider(SplitLocationProvider locationProvider) { - Preconditions.checkArgument(locationProvider != null); - this.locationProvider = locationProvider; - if (LOG.isDebugEnabled()) { - LOG.debug("Split location provider : " + locationProvider); - } + this.locationProvider = Objects.requireNonNull(locationProvider); + LOG.debug("Split location provider : {}", locationProvider); } @Override diff --git a/tez-mapreduce/src/main/java/org/apache/hadoop/mapreduce/split/TezMapReduceSplitsGrouper.java b/tez-mapreduce/src/main/java/org/apache/hadoop/mapreduce/split/TezMapReduceSplitsGrouper.java index b36d11d12b..4f638e1400 100644 --- a/tez-mapreduce/src/main/java/org/apache/hadoop/mapreduce/split/TezMapReduceSplitsGrouper.java +++ b/tez-mapreduce/src/main/java/org/apache/hadoop/mapreduce/split/TezMapReduceSplitsGrouper.java @@ -20,11 +20,11 @@ import java.io.IOException; import java.util.List; +import java.util.Objects; import javax.annotation.Nullable; import com.google.common.base.Function; -import com.google.common.base.Preconditions; import com.google.common.collect.Lists; import org.apache.tez.mapreduce.grouper.GroupedSplitContainer; import org.apache.tez.mapreduce.grouper.MapReduceSplitContainer; @@ -154,7 +154,7 @@ public List getGroupedSplits(Configuration conf, SplitSizeEstimator estimator, SplitLocationProvider locationProvider) throws IOException, InterruptedException { - Preconditions.checkArgument(originalSplits != null, "Splits must be specified"); + Objects.requireNonNull(originalSplits, "Splits must be specified"); List originalSplitContainers = Lists.transform(originalSplits, new Function() { @Override diff --git a/tez-mapreduce/src/main/java/org/apache/tez/client/MRTezClient.java b/tez-mapreduce/src/main/java/org/apache/tez/client/MRTezClient.java index 86089e9a67..1057932e1d 100644 --- a/tez-mapreduce/src/main/java/org/apache/tez/client/MRTezClient.java +++ b/tez-mapreduce/src/main/java/org/apache/tez/client/MRTezClient.java @@ -31,7 +31,6 @@ import org.apache.tez.dag.api.TezConfiguration; import org.apache.tez.dag.api.TezException; import org.apache.tez.dag.api.client.DAGClient; -import org.apache.tez.dag.api.client.MRDAGClient; @InterfaceAudience.Private public class MRTezClient extends TezClient { @@ -46,9 +45,4 @@ public DAGClient submitDAGApplication(ApplicationId appId, org.apache.tez.dag.ap throws TezException, IOException { return super.submitDAGApplication(appId, dag); } - - public static MRDAGClient getDAGClient(ApplicationId appId, TezConfiguration tezConf, FrameworkClient frameworkClient) - throws IOException, TezException { - return new MRDAGClient(TezClient.getDAGClient(appId, tezConf, frameworkClient)); - } } diff --git a/tez-mapreduce/src/main/java/org/apache/tez/common/MRFrameworkConfigs.java b/tez-mapreduce/src/main/java/org/apache/tez/common/MRFrameworkConfigs.java index 6831b52ec4..4cbb28530d 100644 --- a/tez-mapreduce/src/main/java/org/apache/tez/common/MRFrameworkConfigs.java +++ b/tez-mapreduce/src/main/java/org/apache/tez/common/MRFrameworkConfigs.java @@ -23,7 +23,7 @@ import org.apache.hadoop.classification.InterfaceAudience; @InterfaceAudience.Private -public class MRFrameworkConfigs { +public final class MRFrameworkConfigs { /** * One local dir for the specific job. @@ -38,4 +38,6 @@ public class MRFrameworkConfigs { public static final String TASK_LOCAL_RESOURCE_DIR_DEFAULT = "/tmp"; public static final String JOB_LOCAL_DIR = MR_FRAMEWORK_PREFIX + "job.local.dir"; + + private MRFrameworkConfigs() {} } diff --git a/tez-mapreduce/src/main/java/org/apache/tez/dag/api/client/MRDAGClient.java b/tez-mapreduce/src/main/java/org/apache/tez/dag/api/client/MRDAGClient.java index 42b52e03ec..c2646bd819 100644 --- a/tez-mapreduce/src/main/java/org/apache/tez/dag/api/client/MRDAGClient.java +++ b/tez-mapreduce/src/main/java/org/apache/tez/dag/api/client/MRDAGClient.java @@ -85,6 +85,11 @@ public DAGStatus waitForCompletion() throws IOException, TezException, Interrupt return realClient.waitForCompletion(); } + @Override + public DAGStatus waitForCompletion(long timeMs) throws IOException, TezException, InterruptedException { + return realClient.waitForCompletion(timeMs); + } + @Override public DAGStatus waitForCompletionWithStatusUpdates( @Nullable Set statusGetOpts) throws IOException, TezException, InterruptedException { @@ -101,4 +106,10 @@ public DAGStatus getDAGStatus(@Nullable Set statusOptions, long timeout) throws IOException, TezException { return getDAGStatus(statusOptions); } + + @Override + public String getWebUIAddress() throws IOException, TezException { + throw new TezException("MRDAGClient.getWebUIAddress is not supported"); + } + } diff --git a/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/client/NotRunningJob.java b/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/client/NotRunningJob.java index e178948041..8837df6818 100644 --- a/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/client/NotRunningJob.java +++ b/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/client/NotRunningJob.java @@ -64,8 +64,6 @@ import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.ApplicationReport; -import org.apache.hadoop.yarn.api.records.FinalApplicationStatus; -import org.apache.hadoop.yarn.api.records.YarnApplicationState; import org.apache.hadoop.yarn.factories.RecordFactory; import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider; @@ -84,11 +82,16 @@ private ApplicationReport getUnknownApplicationReport() { ApplicationAttemptId unknownAttemptId = recordFactory .newRecordInstance(ApplicationAttemptId.class); - // Setting AppState to NEW and finalStatus to UNDEFINED as they are never - // used for a non running job - return ApplicationReport.newInstance(unknownAppId, unknownAttemptId, "N/A", - "N/A", "N/A", "N/A", 0, null, YarnApplicationState.NEW, "N/A", "N/A", - 0, 0, FinalApplicationStatus.UNDEFINED, null, "N/A", 0.0f, "TEZ_MRR", null); + ApplicationReport report = recordFactory.newRecordInstance(ApplicationReport.class); + report.setApplicationId(unknownAppId); + report.setCurrentApplicationAttemptId(unknownAttemptId); + report.setUser("N/A"); + report.setName("N/A"); + report.setDiagnostics("N/A"); + report.setTrackingUrl("N/A"); + report.setStartTime(0); + report.setFinishTime(0); + return report; } NotRunningJob(ApplicationReport applicationReport, JobState jobState) { diff --git a/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/client/YARNRunner.java b/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/client/YARNRunner.java index 00a68cd998..7aed4a04a8 100644 --- a/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/client/YARNRunner.java +++ b/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/client/YARNRunner.java @@ -148,7 +148,7 @@ public class YARNRunner implements ClientProtocol { * @param conf the configuration object for the client */ public YARNRunner(Configuration conf) { - this(conf, new ResourceMgrDelegate(new YarnConfiguration(conf))); + this(conf, new ResourceMgrDelegate(new YarnConfiguration(conf))); } /** @@ -616,9 +616,7 @@ public JobStatus submitJob(JobID jobId, String jobSubmitDir, Credentials ts) dagAMConf.set(TezConfiguration.TEZ_AM_LAUNCH_CMD_OPTS, javaOpts.toString()); if (envStr.length() > 0) { dagAMConf.set(TezConfiguration.TEZ_AM_LAUNCH_ENV, envStr); - if (LOG.isDebugEnabled()) { - LOG.debug("Setting MR AM env to : " + envStr); - } + LOG.debug("Setting MR AM env to : {}", envStr); } // Submit to ResourceManager @@ -641,7 +639,7 @@ public JobStatus submitJob(JobID jobId, String jobSubmitDir, Credentials ts) tezClient = new MRTezClient("MapReduce", dagAMConf, false, jobLocalResources, ts); tezClient.start(); - tezClient.submitDAGApplication(appId, dag); + dagClient = new MRDAGClient(tezClient.submitDAGApplication(appId, dag)); tezClient.stop(); } catch (TezException e) { throw new IOException(e); @@ -704,9 +702,6 @@ public JobStatus getJobStatus(JobID jobID) throws IOException, String jobFile = MRApps.getJobFile(conf, user, jobID); DAGStatus dagStatus; try { - if(dagClient == null) { - dagClient = MRTezClient.getDAGClient(TypeConverter.toYarn(jobID).getAppId(), tezConf, null); - } dagStatus = dagClient.getDAGStatus(null); return new DAGJobStatus(dagClient.getApplicationReport(), dagStatus, jobFile); } catch (TezException e) { diff --git a/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/combine/MRCombiner.java b/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/combine/MRCombiner.java index 9514215e51..adfd24dda6 100644 --- a/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/combine/MRCombiner.java +++ b/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/combine/MRCombiner.java @@ -20,6 +20,7 @@ import java.io.IOException; +import org.apache.hadoop.mapred.JobConf; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.classification.InterfaceAudience.Public; @@ -78,7 +79,13 @@ public class MRCombiner implements Combiner { private final TaskAttemptID mrTaskAttemptID; public MRCombiner(TaskContext taskContext) throws IOException { - this.conf = TezUtils.createConfFromUserPayload(taskContext.getUserPayload()); + final Configuration userConf = TezUtils.createConfFromUserPayload(taskContext.getUserPayload()); + useNewApi = ConfigUtils.useNewApi(userConf); + if (useNewApi) { + conf = new JobConf(userConf); + } else { + conf = userConf; + } assert(taskContext instanceof InputContext || taskContext instanceof OutputContext); if (taskContext instanceof OutputContext) { @@ -93,8 +100,6 @@ public MRCombiner(TaskContext taskContext) throws IOException { this.reporter = new MRTaskReporter((InputContext)taskContext); } - this.useNewApi = ConfigUtils.useNewApi(conf); - combineInputRecordsCounter = taskContext.getCounters().findCounter(TaskCounter.COMBINE_INPUT_RECORDS); combineOutputRecordsCounter = taskContext.getCounters().findCounter(TaskCounter.COMBINE_OUTPUT_RECORDS); diff --git a/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/committer/MROutputCommitter.java b/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/committer/MROutputCommitter.java index b7ebc4c05d..4a648dc901 100644 --- a/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/committer/MROutputCommitter.java +++ b/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/committer/MROutputCommitter.java @@ -18,6 +18,7 @@ package org.apache.tez.mapreduce.committer; +import org.apache.tez.mapreduce.common.Utils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.classification.InterfaceAudience.Public; @@ -78,6 +79,8 @@ public void initialize() throws IOException { jobConf.getCredentials().mergeAll(UserGroupInformation.getCurrentUser().getCredentials()); jobConf.setInt(MRJobConfig.APPLICATION_ATTEMPT_ID, getContext().getDAGAttemptNumber()); + jobConf.set(MRJobConfig.JOB_COMMITTER_UUID, Utils.getDAGID(getContext())); + jobConf.setInt(MRJobConfig.VERTEX_ID, getContext().getVertexIndex()); committer = getOutputCommitter(getContext()); jobContext = getJobContextFromVertexContext(getContext()); initialized = true; diff --git a/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/common/MRInputAMSplitGenerator.java b/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/common/MRInputAMSplitGenerator.java index dbfdcb3843..d06a5f46a0 100644 --- a/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/common/MRInputAMSplitGenerator.java +++ b/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/common/MRInputAMSplitGenerator.java @@ -30,7 +30,6 @@ import org.apache.hadoop.classification.InterfaceStability.Evolving; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.mapred.JobConf; -import org.apache.hadoop.mapreduce.split.TezMapReduceSplitsGrouper; import org.apache.hadoop.security.UserGroupInformation; import org.apache.tez.common.TezUtils; import org.apache.tez.dag.api.VertexLocationHint; @@ -80,8 +79,8 @@ public List initialize() throws Exception { + sw.now(TimeUnit.MILLISECONDS)); } sw.reset().start(); - Configuration conf = TezUtils.createConfFromByteString(userPayloadProto - .getConfigurationBytes()); + Configuration conf = new JobConf(getContext().getVertexConfiguration()); + TezUtils.addToConfFromByteString(conf, userPayloadProto.getConfigurationBytes()); sendSerializedEvents = conf.getBoolean( MRJobConfig.MR_TEZ_INPUT_INITIALIZER_SERIALIZE_EVENT_PAYLOAD, diff --git a/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/common/Utils.java b/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/common/Utils.java index 42c68bd121..85483fc598 100644 --- a/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/common/Utils.java +++ b/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/common/Utils.java @@ -21,6 +21,7 @@ import java.io.IOException; import java.util.ArrayList; import java.util.List; +import java.util.Objects; import org.apache.hadoop.classification.InterfaceAudience.Private; import org.apache.hadoop.conf.Configuration; @@ -29,12 +30,15 @@ import org.apache.hadoop.fs.FileSystem.Statistics; import org.apache.hadoop.mapred.Counters.Counter; import org.apache.tez.common.counters.TezCounter; +import org.apache.tez.dag.records.TezDAGID; import org.apache.tez.mapreduce.hadoop.mapred.MRCounters; - -import com.google.common.base.Preconditions; +import org.apache.tez.runtime.api.OutputCommitterContext; +import org.apache.tez.runtime.api.OutputContext; @Private -public class Utils { +public final class Utils { + + private Utils() {} /** * Gets a handle to the Statistics instance based on the scheme associated @@ -47,7 +51,7 @@ public class Utils { */ @Private public static List getFsStatistics(Path path, Configuration conf) throws IOException { - List matchedStats = new ArrayList(); + List matchedStats = new ArrayList<>(); path = path.getFileSystem(conf).makeQualified(path); String scheme = path.toUri().getScheme(); for (Statistics stats : FileSystem.getAllStatistics()) { @@ -59,8 +63,15 @@ public static List getFsStatistics(Path path, Configuration conf) th } public static Counter getMRCounter(TezCounter tezCounter) { - Preconditions.checkNotNull(tezCounter); + Objects.requireNonNull(tezCounter); return new MRCounters.MRCounter(tezCounter); } - + + public static String getDAGID(OutputCommitterContext context) { + return TezDAGID.getInstance(context.getApplicationId(), context.getDagIdentifier()).toString(); + } + + public static String getDAGID(OutputContext context) { + return TezDAGID.getInstance(context.getApplicationId(), context.getDagIdentifier()).toString(); + } } diff --git a/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/grouper/MapReduceSplitContainer.java b/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/grouper/MapReduceSplitContainer.java index 63e2138ca8..fc6a424bd3 100644 --- a/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/grouper/MapReduceSplitContainer.java +++ b/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/grouper/MapReduceSplitContainer.java @@ -15,8 +15,8 @@ package org.apache.tez.mapreduce.grouper; import java.io.IOException; +import java.util.Objects; -import com.google.common.base.Preconditions; import org.apache.hadoop.mapreduce.InputSplit; public class MapReduceSplitContainer extends SplitContainer { @@ -24,7 +24,7 @@ public class MapReduceSplitContainer extends SplitContainer { private final InputSplit inputSplit; public MapReduceSplitContainer(InputSplit inputSplit) { - Preconditions.checkNotNull(inputSplit); + Objects.requireNonNull(inputSplit); this.inputSplit = inputSplit; } diff --git a/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/grouper/MapredSplitContainer.java b/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/grouper/MapredSplitContainer.java index f7dbfda491..ce86ad52ef 100644 --- a/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/grouper/MapredSplitContainer.java +++ b/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/grouper/MapredSplitContainer.java @@ -15,8 +15,8 @@ package org.apache.tez.mapreduce.grouper; import java.io.IOException; +import java.util.Objects; -import com.google.common.base.Preconditions; import org.apache.hadoop.mapred.InputSplit; public class MapredSplitContainer extends SplitContainer { @@ -24,7 +24,7 @@ public class MapredSplitContainer extends SplitContainer { private final InputSplit inputSplit; public MapredSplitContainer(InputSplit inputSplit) { - Preconditions.checkNotNull(inputSplit); + Objects.requireNonNull(inputSplit); this.inputSplit = inputSplit; } diff --git a/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/grouper/TezSplitGrouper.java b/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/grouper/TezSplitGrouper.java index 26e5a9ed37..067acca9e6 100644 --- a/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/grouper/TezSplitGrouper.java +++ b/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/grouper/TezSplitGrouper.java @@ -22,10 +22,12 @@ import java.util.LinkedList; import java.util.List; import java.util.Map; +import java.util.Objects; import java.util.Set; import java.util.TreeMap; -import com.google.common.base.Preconditions; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.tez.common.Preconditions; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.yarn.util.RackResolver; import org.apache.tez.dag.api.TezUncheckedException; @@ -101,6 +103,20 @@ public abstract class TezSplitGrouper { public static final String TEZ_GROUPING_NODE_LOCAL_ONLY = "tez.grouping.node.local.only"; public static final boolean TEZ_GROUPING_NODE_LOCAL_ONLY_DEFAULT = false; + /** + * Number of threads used to initialize the grouped splits, to asynchronously open the readers. + */ + public static final String TEZ_GROUPING_SPLIT_INIT_THREADS = "tez.grouping.split.init.threads"; + public static final int TEZ_GROUPING_SPLIT_INIT_THREADS_DEFAULT = 4; + + /** + * Number of record readers to asynchronously and proactively init. + * In order for upstream apps to use this feature, the objects created in the + * upstream apps as part TezGroupedSplitsRecordReader call should be thread safe. + */ + @InterfaceStability.Unstable + public static final String TEZ_GROUPING_SPLIT_INIT_RECORDREADERS = "tez.grouping.split.init.recordreaders"; + public static final int TEZ_GROUPING_SPLIT_INIT_RECORDREADERS_DEFAULT = 1; static class LocationHolder { List splits; @@ -166,7 +182,7 @@ public List getGroupedSplits(Configuration conf, SplitLocationProviderWrapper locationProvider) throws IOException, InterruptedException { LOG.info("Grouping splits in Tez"); - Preconditions.checkArgument(originalSplits != null, "Splits must be specified"); + Objects.requireNonNull(originalSplits, "Splits must be specified"); int configNumSplits = conf.getInt(TEZ_GROUPING_SPLIT_COUNT, 0); if (configNumSplits > 0) { @@ -192,10 +208,12 @@ public List getGroupedSplits(Configuration conf, long totalLength = 0; Map distinctLocations = createLocationsMap(conf); + Map splitToLocationsMap = new HashMap<>(originalSplits.size()); // go through splits and add them to locations for (SplitContainer split : originalSplits) { totalLength += estimator.getEstimatedSize(split); String[] locations = locationProvider.getPreferredLocations(split); + splitToLocationsMap.put(split, locations); if (locations == null || locations.length == 0) { locations = emptyLocations; allSplitsHaveLocalhost = false; @@ -235,36 +253,28 @@ public List getGroupedSplits(Configuration conf, "Invalid max/min group lengths. Required min>0, max>=min. " + " max: " + maxLengthPerGroup + " min: " + minLengthPerGroup); } + int newDesiredNumSplits = -1; if (lengthPerGroup > maxLengthPerGroup) { // splits too big to work. Need to override with max size. - int newDesiredNumSplits = (int)(totalLength/maxLengthPerGroup) + 1; - LOG.info("Desired splits: " + desiredNumSplits + " too small. " + - " Desired splitLength: " + lengthPerGroup + - " Max splitLength: " + maxLengthPerGroup + - " New desired splits: " + newDesiredNumSplits + - " Total length: " + totalLength + - " Original splits: " + originalSplits.size()); - - desiredNumSplits = newDesiredNumSplits; + newDesiredNumSplits = (int)(totalLength/maxLengthPerGroup) + 1; } else if (lengthPerGroup < minLengthPerGroup) { // splits too small to work. Need to override with size. - int newDesiredNumSplits = (int)(totalLength/minLengthPerGroup) + 1; - /** - * This is a workaround for systems like S3 that pass the same - * fake hostname for all splits. - */ - if (!allSplitsHaveLocalhost) { - desiredNumSplits = newDesiredNumSplits; + newDesiredNumSplits = (int)(totalLength/minLengthPerGroup) + 1; + if (allSplitsHaveLocalhost) { + // Workaround for systems like S3 that pass the same fake hostname for all splits. + LOG.info("Ignore {} configuration cause all splits seem to be on localhost.", TEZ_GROUPING_SPLIT_MIN_SIZE); + newDesiredNumSplits = desiredNumSplits; } - - LOG.info("Desired splits: " + desiredNumSplits + " too large. " + - " Desired splitLength: " + lengthPerGroup + + } + if (newDesiredNumSplits != -1) { + LOG.info("Desired splitLength " + lengthPerGroup + " exceeds min/max bounds. " + " Min splitLength: " + minLengthPerGroup + - " New desired splits: " + newDesiredNumSplits + - " Final desired splits: " + desiredNumSplits + - " All splits have localhost: " + allSplitsHaveLocalhost + + " Max splitLength: " + maxLengthPerGroup + + " Desired splits: " + desiredNumSplits + + " New Desired splits: " + newDesiredNumSplits + " Total length: " + totalLength + " Original splits: " + originalSplits.size()); + desiredNumSplits = newDesiredNumSplits; } } @@ -277,7 +287,7 @@ public List getGroupedSplits(Configuration conf, groupedSplits = new ArrayList(originalSplits.size()); for (SplitContainer split : originalSplits) { GroupedSplitContainer newSplit = - new GroupedSplitContainer(1, wrappedInputFormatName, cleanupLocations(locationProvider.getPreferredLocations(split)), + new GroupedSplitContainer(1, wrappedInputFormatName, cleanupLocations(splitToLocationsMap.get(split)), null); newSplit.addSplit(split); groupedSplits.add(newSplit); @@ -298,7 +308,7 @@ public List getGroupedSplits(Configuration conf, Set locSet = new HashSet(); for (SplitContainer split : originalSplits) { locSet.clear(); - String[] locations = locationProvider.getPreferredLocations(split); + String[] locations = splitToLocationsMap.get(split); if (locations == null || locations.length == 0) { locations = emptyLocations; } @@ -392,7 +402,7 @@ public List getGroupedSplits(Configuration conf, groupLocation = null; } else if (doingRackLocal) { for (SplitContainer splitH : group) { - String[] locations = locationProvider.getPreferredLocations(splitH); + String[] locations = splitToLocationsMap.get(splitH); if (locations != null) { for (String loc : locations) { if (loc != null) { @@ -487,7 +497,7 @@ public List getGroupedSplits(Configuration conf, } numRackSplitsToGroup--; rackSet.clear(); - String[] locations = locationProvider.getPreferredLocations(split); + String[] locations = splitToLocationsMap.get(split); if (locations == null || locations.length == 0) { locations = emptyLocations; } diff --git a/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/hadoop/DeprecatedKeys.java b/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/hadoop/DeprecatedKeys.java index 9ae58c0fce..8712060546 100644 --- a/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/hadoop/DeprecatedKeys.java +++ b/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/hadoop/DeprecatedKeys.java @@ -22,13 +22,12 @@ import java.util.HashMap; import java.util.Map; -import org.apache.hadoop.conf.Configuration; import org.apache.tez.dag.api.TezConfiguration; import org.apache.tez.dag.library.vertexmanager.ShuffleVertexManager; import org.apache.tez.runtime.library.api.TezRuntimeConfiguration; import org.apache.tez.runtime.library.common.Constants; -public class DeprecatedKeys { +public final class DeprecatedKeys { @@ -36,13 +35,13 @@ public class DeprecatedKeys { /** * Keys used by the DAG - mainly the AM. */ - private static Map mrParamToDAGParamMap = new HashMap(); + private static final Map MR_PARAM_TO_DAG_PARAM_MAP = new HashMap<>(); /** * Keys used by the Tez Runtime. */ - private static Map mrParamToTezRuntimeParamMap = - new HashMap(); + private static final Map MR_PARAM_TO_TEZ_RUNTIME_PARAM_MAP = + new HashMap<>(); @@ -52,20 +51,22 @@ public class DeprecatedKeys { addDeprecatedKeys(); } + private DeprecatedKeys() {} + private static void populateMRToDagParamMap() { // TODO Default value handling. - mrParamToDAGParamMap.put(MRJobConfig.MR_AM_TASK_LISTENER_THREAD_COUNT, + MR_PARAM_TO_DAG_PARAM_MAP.put(MRJobConfig.MR_AM_TASK_LISTENER_THREAD_COUNT, TezConfiguration.TEZ_AM_TASK_LISTENER_THREAD_COUNT); - mrParamToDAGParamMap.put(MRJobConfig.MAX_TASK_FAILURES_PER_TRACKER, + MR_PARAM_TO_DAG_PARAM_MAP.put(MRJobConfig.MAX_TASK_FAILURES_PER_TRACKER, TezConfiguration.TEZ_AM_MAX_TASK_FAILURES_PER_NODE); - mrParamToDAGParamMap.put(MRJobConfig.MR_AM_JOB_NODE_BLACKLISTING_ENABLE, + MR_PARAM_TO_DAG_PARAM_MAP.put(MRJobConfig.MR_AM_JOB_NODE_BLACKLISTING_ENABLE, TezConfiguration.TEZ_AM_NODE_BLACKLISTING_ENABLED); - mrParamToDAGParamMap.put( + MR_PARAM_TO_DAG_PARAM_MAP.put( MRJobConfig.MR_AM_IGNORE_BLACKLISTING_BLACKLISTED_NODE_PERECENT, TezConfiguration.TEZ_AM_NODE_BLACKLISTING_IGNORE_THRESHOLD); - mrParamToDAGParamMap.put(MRJobConfig.QUEUE_NAME, + MR_PARAM_TO_DAG_PARAM_MAP.put(MRJobConfig.QUEUE_NAME, TezConfiguration.TEZ_QUEUE_NAME); // Counter replacement will work in this manner, as long as TezCounters @@ -74,20 +75,24 @@ private static void populateMRToDagParamMap() { // may break. // Framework counters, like FILESYSTEM will likely be incompatible since // they enum key belongs to a different package. - mrParamToDAGParamMap.put(MRJobConfig.COUNTERS_MAX_KEY, + MR_PARAM_TO_DAG_PARAM_MAP.put(MRJobConfig.COUNTERS_MAX_KEY, TezConfiguration.TEZ_COUNTERS_MAX); - mrParamToDAGParamMap.put(MRJobConfig.COUNTER_GROUPS_MAX_KEY, + MR_PARAM_TO_DAG_PARAM_MAP.put(MRJobConfig.COUNTER_GROUPS_MAX_KEY, TezConfiguration.TEZ_COUNTERS_MAX_GROUPS); - mrParamToDAGParamMap.put(MRJobConfig.COUNTER_NAME_MAX_KEY, + MR_PARAM_TO_DAG_PARAM_MAP.put(MRJobConfig.COUNTER_NAME_MAX_KEY, TezConfiguration.TEZ_COUNTERS_COUNTER_NAME_MAX_LENGTH); - mrParamToDAGParamMap.put(MRJobConfig.COUNTER_GROUP_NAME_MAX_KEY, + MR_PARAM_TO_DAG_PARAM_MAP.put(MRJobConfig.COUNTER_GROUP_NAME_MAX_KEY, TezConfiguration.TEZ_COUNTERS_GROUP_NAME_MAX_LENGTH); - mrParamToDAGParamMap.put(MRJobConfig.TASK_TIMEOUT, + MR_PARAM_TO_DAG_PARAM_MAP.put(MRJobConfig.TASK_TIMEOUT, TezConfiguration.TASK_HEARTBEAT_TIMEOUT_MS); - mrParamToDAGParamMap.put(MRJobConfig.JOB_TAGS, + MR_PARAM_TO_DAG_PARAM_MAP.put(MRJobConfig.JOB_TAGS, TezConfiguration.TEZ_APPLICATION_TAGS); - mrParamToDAGParamMap.put(MRJobConfig.MAPREDUCE_JOB_USER_CLASSPATH_FIRST, + MR_PARAM_TO_DAG_PARAM_MAP.put(MRJobConfig.MAPREDUCE_JOB_USER_CLASSPATH_FIRST, TezConfiguration.TEZ_USER_CLASSPATH_FIRST); + MR_PARAM_TO_DAG_PARAM_MAP.put(MRJobConfig.JOB_NAMENODES, + TezConfiguration.TEZ_JOB_FS_SERVERS); + MR_PARAM_TO_DAG_PARAM_MAP.put(MRJobConfig.JOB_NAMENODES_TOKEN_RENEWAL_EXCLUDE, + TezConfiguration.TEZ_JOB_FS_SERVERS_TOKEN_RENEWAL_EXCLUDE); } // TODO TEZAM4 Sometime, make sure this gets loaded by default. Instead of the current initialization in MRAppMaster, TezChild. @@ -161,6 +166,16 @@ private static void populateMRToTezRuntimeParamMap() { registerMRToRuntimeKeyTranslation(MRJobConfig.MAP_OUTPUT_COMPRESS_CODEC, TezRuntimeConfiguration.TEZ_RUNTIME_COMPRESS_CODEC); registerMRToRuntimeKeyTranslation(MRJobConfig.MAPREDUCE_JOB_USER_CLASSPATH_FIRST, TezConfiguration.TEZ_USER_CLASSPATH_FIRST); + + registerMRToRuntimeKeyTranslation(MRJobConfig.RETRY_AFTER_NO_SPECULATE, TezConfiguration.TEZ_AM_SOONEST_RETRY_AFTER_NO_SPECULATE); + + registerMRToRuntimeKeyTranslation(MRJobConfig.RETRY_AFTER_SPECULATE, TezConfiguration.TEZ_AM_SOONEST_RETRY_AFTER_SPECULATE); + + registerMRToRuntimeKeyTranslation(MRJobConfig.SPECULATIVECAP_RUNNING_TASKS, TezConfiguration.TEZ_AM_PROPORTION_RUNNING_TASKS_SPECULATABLE); + + registerMRToRuntimeKeyTranslation(MRJobConfig.SPECULATIVECAP_TOTAL_TASKS, TezConfiguration.TEZ_AM_PROPORTION_TOTAL_TASKS_SPECULATABLE); + + registerMRToRuntimeKeyTranslation(MRJobConfig.MINIMUM_ALLOWED_TASKS, TezConfiguration.TEZ_AM_MINIMUM_ALLOWED_SPECULATIVE_TASKS); } private static void addDeprecatedKeys() { @@ -168,19 +183,14 @@ private static void addDeprecatedKeys() { private static void registerMRToRuntimeKeyTranslation(String mrKey, String tezKey) { - mrParamToTezRuntimeParamMap.put(mrKey, tezKey); - } - - @SuppressWarnings("unused") - private static void _(String mrKey, String tezKey) { - Configuration.addDeprecation(mrKey, tezKey); + MR_PARAM_TO_TEZ_RUNTIME_PARAM_MAP.put(mrKey, tezKey); } public static Map getMRToDAGParamMap() { - return Collections.unmodifiableMap(mrParamToDAGParamMap); + return Collections.unmodifiableMap(MR_PARAM_TO_DAG_PARAM_MAP); } public static Map getMRToTezRuntimeParamMap() { - return Collections.unmodifiableMap(mrParamToTezRuntimeParamMap); + return Collections.unmodifiableMap(MR_PARAM_TO_TEZ_RUNTIME_PARAM_MAP); } } diff --git a/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/hadoop/IDConverter.java b/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/hadoop/IDConverter.java index 0f1b56d3d2..8656147e2c 100644 --- a/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/hadoop/IDConverter.java +++ b/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/hadoop/IDConverter.java @@ -28,7 +28,9 @@ import org.apache.tez.dag.records.TezTaskID; import org.apache.tez.dag.records.TezVertexID; -public class IDConverter { +public final class IDConverter { + + private IDConverter() {} // FIXME hardcoded assumption that one app is one dag public static JobID toMRJobId(TezDAGID dagId) { @@ -39,7 +41,7 @@ public static JobID toMRJobId(TezDAGID dagId) { public static TaskID toMRTaskId(TezTaskID taskid) { return new TaskID( - toMRJobId(taskid.getVertexID().getDAGId()), + toMRJobId(taskid.getDAGID()), taskid.getVertexID().getId() == 0 ? TaskType.MAP : TaskType.REDUCE, taskid.getId()); } diff --git a/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/hadoop/InputSplitInfoMem.java b/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/hadoop/InputSplitInfoMem.java index d7873fc0c8..5e1207b7b5 100644 --- a/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/hadoop/InputSplitInfoMem.java +++ b/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/hadoop/InputSplitInfoMem.java @@ -28,7 +28,7 @@ import org.apache.tez.dag.api.TaskLocationHint; import org.apache.tez.mapreduce.protos.MRRuntimeProtos.MRSplitsProto; -import com.google.common.base.Preconditions; +import org.apache.tez.common.Preconditions; /** * Represents InputSplitInfo for splits generated to memory.

    diff --git a/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/hadoop/MRHelpers.java b/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/hadoop/MRHelpers.java index efd6463530..dd13eb205a 100644 --- a/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/hadoop/MRHelpers.java +++ b/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/hadoop/MRHelpers.java @@ -45,10 +45,11 @@ */ @Public @Evolving -public class MRHelpers { +public final class MRHelpers { private static final Logger LOG = LoggerFactory.getLogger(MRHelpers.class); + private MRHelpers() {} /** * Translate MapReduce configuration keys to the equivalent Tez keys in the provided @@ -105,7 +106,7 @@ public static void configureMRApiUsage(Configuration conf) { } private static void convertVertexConfToTez(Configuration vertexConf, boolean preferTez) { - setStageKeysFromBaseConf(vertexConf, vertexConf, "unknown"); + setStageKeysFromBaseConf(vertexConf, vertexConf); processDirectConversion(vertexConf, preferTez); setupMRComponents(vertexConf); } @@ -136,7 +137,7 @@ private static void setupMRComponents(Configuration conf) { * require translation to tez keys. */ private static void setStageKeysFromBaseConf(Configuration conf, - Configuration baseConf, String stage) { + Configuration baseConf) { // Don't clobber explicit tez config. JobConf jobConf = null; if (conf.get(TezRuntimeConfiguration.TEZ_RUNTIME_KEY_CLASS) == null) { @@ -151,7 +152,7 @@ private static void setStageKeysFromBaseConf(Configuration conf, if (LOG.isDebugEnabled()) { LOG.debug("Setting " + MRJobConfig.MAP_OUTPUT_KEY_CLASS - + " for stage: " + stage + + " for stage: unknown" + " based on job level configuration. Value: " + conf.get(MRJobConfig.MAP_OUTPUT_KEY_CLASS)); } @@ -168,7 +169,7 @@ private static void setStageKeysFromBaseConf(Configuration conf, .getMapOutputValueClass().getName()); if (LOG.isDebugEnabled()) { LOG.debug("Setting " + MRJobConfig.MAP_OUTPUT_VALUE_CLASS - + " for stage: " + stage + + " for stage: unknown" + " based on job level configuration. Value: " + conf.get(MRJobConfig.MAP_OUTPUT_VALUE_CLASS)); } @@ -204,12 +205,12 @@ private static String getChildLogLevel(Configuration conf, boolean isMap) { if (isMap) { return conf.get( MRJobConfig.MAP_LOG_LEVEL, - JobConf.DEFAULT_LOG_LEVEL.toString() + MRJobConfig.DEFAULT_LOG_LEVEL ); } else { return conf.get( MRJobConfig.REDUCE_LOG_LEVEL, - JobConf.DEFAULT_LOG_LEVEL.toString() + MRJobConfig.DEFAULT_LOG_LEVEL ); } } @@ -223,7 +224,7 @@ private static void ensureNotSet(Configuration conf, String attr, String msg) private static String getLog4jCmdLineProperties(Configuration conf, boolean isMap) { - Vector logProps = new Vector(4); + Vector logProps = new Vector<>(4); TezUtils.addLog4jSystemProperties(getChildLogLevel(conf, isMap), logProps); StringBuilder sb = new StringBuilder(); for (String str : logProps) { diff --git a/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/hadoop/MRInputHelpers.java b/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/hadoop/MRInputHelpers.java index 2f3d7ce3ec..3c47d5986d 100644 --- a/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/hadoop/MRInputHelpers.java +++ b/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/hadoop/MRInputHelpers.java @@ -19,6 +19,7 @@ package org.apache.tez.mapreduce.hadoop; import java.io.IOException; +import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; @@ -27,14 +28,18 @@ import java.util.HashSet; import java.util.List; import java.util.Map; +import java.util.Objects; import com.google.common.base.Function; -import com.google.common.base.Preconditions; +import com.google.common.base.Strings; + +import org.apache.tez.common.Preconditions; import com.google.common.collect.Iterables; import com.google.common.collect.Lists; import com.google.protobuf.ByteString; import org.apache.tez.runtime.api.InputContext; +import org.apache.tez.runtime.api.events.InputDataInformationEvent; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.classification.InterfaceAudience; @@ -42,6 +47,7 @@ import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.classification.InterfaceStability.Unstable; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; @@ -71,6 +77,7 @@ import org.apache.tez.mapreduce.input.MRInput; import org.apache.tez.mapreduce.input.MRInputLegacy; import org.apache.tez.mapreduce.protos.MRRuntimeProtos; +import org.apache.tez.mapreduce.protos.MRRuntimeProtos.MRSplitProto; @Public @Unstable @@ -81,6 +88,8 @@ public class MRInputHelpers { static final String JOB_SPLIT_RESOURCE_NAME = "job.split"; static final String JOB_SPLIT_METAINFO_RESOURCE_NAME = "job.splitmetainfo"; + protected MRInputHelpers() {} + /** * Setup split generation on the client, with splits being distributed via the traditional * MapReduce mechanism of distributing splits via the Distributed Cache. @@ -106,7 +115,7 @@ public class MRInputHelpers { public static DataSourceDescriptor configureMRInputWithLegacySplitGeneration(Configuration conf, Path splitsDir, boolean useLegacyInput) { - InputSplitInfo inputSplitInfo = null; + InputSplitInfo inputSplitInfo; try { inputSplitInfo = generateInputSplits(conf, splitsDir); @@ -116,17 +125,11 @@ public static DataSourceDescriptor configureMRInputWithLegacySplitGeneration(Con Map additionalLocalResources = new HashMap(); updateLocalResourcesForInputSplits(conf, inputSplitInfo, additionalLocalResources); - DataSourceDescriptor dsd = - DataSourceDescriptor.create(inputDescriptor, null, inputSplitInfo.getNumTasks(), - inputSplitInfo.getCredentials(), - VertexLocationHint.create(inputSplitInfo.getTaskLocationHints()), - additionalLocalResources); - return dsd; - } catch (IOException e) { - throw new TezUncheckedException("Failed to generate InputSplits", e); - } catch (InterruptedException e) { - throw new TezUncheckedException("Failed to generate InputSplits", e); - } catch (ClassNotFoundException e) { + return DataSourceDescriptor.create(inputDescriptor, null, inputSplitInfo.getNumTasks(), + inputSplitInfo.getCredentials(), + VertexLocationHint.create(inputSplitInfo.getTaskLocationHints()), + additionalLocalResources); + } catch (IOException | InterruptedException | ClassNotFoundException e) { throw new TezUncheckedException("Failed to generate InputSplits", e); } } @@ -138,7 +141,6 @@ public static DataSourceDescriptor configureMRInputWithLegacySplitGeneration(Con * @param payload the {@link org.apache.tez.dag.api.UserPayload} instance * @return an instance of {@link org.apache.tez.mapreduce.protos.MRRuntimeProtos.MRInputUserPayloadProto}, * which provides access to the underlying configuration bytes - * @throws IOException */ @InterfaceStability.Evolving @InterfaceAudience.LimitedPrivate({"hive, pig"}) @@ -155,7 +157,6 @@ public static MRRuntimeProtos.MRInputUserPayloadProto parseMRInputPayload(UserPa * instance representing the split * @param serializationFactory the serialization mechanism used to write out the split * @return an instance of the split - * @throws java.io.IOException */ @SuppressWarnings("unchecked") @InterfaceStability.Evolving @@ -165,7 +166,7 @@ public static InputSplit createOldFormatSplitFromUserPayload( throws IOException { // This may not need to use serialization factory, since OldFormat // always uses Writable to write splits. - Preconditions.checkNotNull(splitProto, "splitProto cannot be null"); + Objects.requireNonNull(splitProto, "splitProto cannot be null"); String className = splitProto.getSplitClassName(); Class clazz; @@ -191,14 +192,13 @@ public static InputSplit createOldFormatSplitFromUserPayload( * instance representing the split * @param serializationFactory the serialization mechanism used to write out the split * @return an instance of the split - * @throws IOException */ @InterfaceStability.Evolving @SuppressWarnings("unchecked") public static org.apache.hadoop.mapreduce.InputSplit createNewFormatSplitFromUserPayload( MRRuntimeProtos.MRSplitProto splitProto, SerializationFactory serializationFactory) throws IOException { - Preconditions.checkNotNull(splitProto, "splitProto must be specified"); + Objects.requireNonNull(splitProto, "splitProto must be specified"); String className = splitProto.getSplitClassName(); Class clazz; @@ -221,7 +221,7 @@ public static org.apache.hadoop.mapreduce.InputSplit createNewFormatSplitFromUse @InterfaceStability.Evolving public static MRRuntimeProtos.MRSplitProto createSplitProto( T newSplit, SerializationFactory serializationFactory) - throws IOException, InterruptedException { + throws IOException { MRRuntimeProtos.MRSplitProto.Builder builder = MRRuntimeProtos.MRSplitProto .newBuilder(); @@ -277,9 +277,6 @@ public static MRRuntimeProtos.MRSplitProto createSplitProto( * @param targetTasks the number of target tasks if grouping is enabled. Specify as 0 otherwise. * @return an instance of {@link InputSplitInfoMem} which supports a subset of * the APIs defined on {@link InputSplitInfo} - * @throws IOException - * @throws ClassNotFoundException - * @throws InterruptedException */ @InterfaceStability.Unstable @InterfaceAudience.LimitedPrivate({"hive, pig"}) @@ -309,30 +306,23 @@ public static InputSplitInfoMem generateInputSplitsToMem(Configuration conf, * @param targetTasks the number of target tasks if grouping is enabled. Specify as 0 otherwise. * @return an instance of {@link InputSplitInfoMem} which supports a subset of * the APIs defined on {@link InputSplitInfo} - * @throws IOException - * @throws ClassNotFoundException - * @throws InterruptedException */ @InterfaceStability.Unstable public static InputSplitInfoMem generateInputSplitsToMem(Configuration conf, boolean groupSplits, boolean sortSplits, int targetTasks) throws IOException, ClassNotFoundException, InterruptedException { - InputSplitInfoMem splitInfoMem = null; + InputSplitInfoMem splitInfoMem; JobConf jobConf = new JobConf(conf); if (jobConf.getUseNewMapper()) { - if (LOG.isDebugEnabled()) { - LOG.debug("Generating mapreduce api input splits"); - } + LOG.debug("Generating mapreduce api input splits"); Job job = Job.getInstance(conf); org.apache.hadoop.mapreduce.InputSplit[] splits = generateNewSplits(job, groupSplits, sortSplits, targetTasks); splitInfoMem = new InputSplitInfoMem(splits, createTaskLocationHintsFromSplits(splits), splits.length, job.getCredentials(), job.getConfiguration()); } else { - if (LOG.isDebugEnabled()) { - LOG.debug("Generating mapred api input splits"); - } + LOG.debug("Generating mapred api input splits"); org.apache.hadoop.mapred.InputSplit[] splits = generateOldSplits(jobConf, groupSplits, sortSplits, targetTasks); splitInfoMem = new InputSplitInfoMem(splits, createTaskLocationHintsFromSplits(splits), @@ -359,7 +349,7 @@ public TaskLocationHint apply( if (rack == null) { if (input.getLocations() != null) { return TaskLocationHint.createTaskLocationHint( - new HashSet(Arrays.asList(input.getLocations())), null); + new HashSet<>(Arrays.asList(input.getLocations())), null); } else { return TaskLocationHint.createTaskLocationHint(null, null); } @@ -369,7 +359,7 @@ public TaskLocationHint apply( } } else { return TaskLocationHint.createTaskLocationHint( - new HashSet(Arrays.asList(input.getLocations())), null); + new HashSet<>(Arrays.asList(input.getLocations())), null); } } catch (IOException e) { throw new RuntimeException(e); @@ -402,7 +392,7 @@ public TaskLocationHint apply(org.apache.hadoop.mapred.InputSplit input) { } } else { return TaskLocationHint.createTaskLocationHint( - new HashSet(Arrays.asList(input.getLocations())), + new HashSet<>(Arrays.asList(input.getLocations())), null); } } catch (IOException e) { @@ -416,20 +406,20 @@ public TaskLocationHint apply(org.apache.hadoop.mapred.InputSplit input) { @SuppressWarnings({ "rawtypes", "unchecked" }) private static org.apache.hadoop.mapreduce.InputSplit[] generateNewSplits( JobContext jobContext, boolean groupSplits, boolean sortSplits, - int numTasks) throws ClassNotFoundException, IOException, + int numTasks) throws IOException, InterruptedException { Configuration conf = jobContext.getConfiguration(); // This is the real input format. - org.apache.hadoop.mapreduce.InputFormat inputFormat = null; + org.apache.hadoop.mapreduce.InputFormat inputFormat; try { inputFormat = ReflectionUtils.newInstance(jobContext.getInputFormatClass(), conf); } catch (ClassNotFoundException e) { throw new TezUncheckedException(e); } - org.apache.hadoop.mapreduce.InputFormat finalInputFormat = inputFormat; + org.apache.hadoop.mapreduce.InputFormat finalInputFormat; // For grouping, the underlying InputFormatClass class is passed in as a parameter. // JobContext has this setup as TezGroupedSplitInputFormat @@ -446,7 +436,7 @@ private static org.apache.hadoop.mapreduce.InputSplit[] generateNewSplits( List array = finalInputFormat .getSplits(jobContext); - org.apache.hadoop.mapreduce.InputSplit[] splits = (org.apache.hadoop.mapreduce.InputSplit[]) array + org.apache.hadoop.mapreduce.InputSplit[] splits = array .toArray(new org.apache.hadoop.mapreduce.InputSplit[array.size()]); if (sortSplits) { @@ -472,7 +462,7 @@ private static org.apache.hadoop.mapred.InputSplit[] generateOldSplits( throw new TezUncheckedException(e); } - org.apache.hadoop.mapred.InputFormat finalInputFormat = inputFormat; + org.apache.hadoop.mapred.InputFormat finalInputFormat; if (groupSplits) { org.apache.hadoop.mapred.split.TezGroupedSplitsInputFormat groupedFormat = @@ -505,16 +495,8 @@ public int compare(org.apache.hadoop.mapreduce.InputSplit o1, try { long len1 = o1.getLength(); long len2 = o2.getLength(); - if (len1 < len2) { - return 1; - } else if (len1 == len2) { - return 0; - } else { - return -1; - } - } catch (IOException ie) { - throw new RuntimeException("exception in InputSplit compare", ie); - } catch (InterruptedException ie) { + return Long.compare(len2, len1); + } catch (IOException | InterruptedException ie) { throw new RuntimeException("exception in InputSplit compare", ie); } } @@ -531,13 +513,7 @@ public int compare(org.apache.hadoop.mapred.InputSplit o1, try { long len1 = o1.getLength(); long len2 = o2.getLength(); - if (len1 < len2) { - return 1; - } else if (len1 == len2) { - return 0; - } else { - return -1; - } + return Long.compare(len2, len1); } catch (IOException ie) { throw new RuntimeException("Problem getting input split size", ie); } @@ -552,10 +528,6 @@ public int compare(org.apache.hadoop.mapred.InputSplit o1, * @return InputSplitInfo containing the split files' information and the * location hints for each split generated to be used to determining parallelism of * the map stage. - * - * @throws IOException - * @throws InterruptedException - * @throws ClassNotFoundException */ private static InputSplitInfoDisk writeNewSplits(JobContext jobContext, Path inputSplitDir) throws IOException, InterruptedException, @@ -571,10 +543,10 @@ private static InputSplitInfoDisk writeNewSplits(JobContext jobContext, List locationHints = new ArrayList(splits.length); - for (int i = 0; i < splits.length; ++i) { + for (org.apache.hadoop.mapreduce.InputSplit split : splits) { locationHints.add( - TaskLocationHint.createTaskLocationHint(new HashSet( - Arrays.asList(splits[i].getLocations())), null) + TaskLocationHint.createTaskLocationHint(new HashSet( + Arrays.asList(split.getLocations())), null) ); } @@ -592,8 +564,6 @@ private static InputSplitInfoDisk writeNewSplits(JobContext jobContext, * @return InputSplitInfo containing the split files' information and the * number of splits generated to be used to determining parallelism of * the map stage. - * - * @throws IOException */ private static InputSplitInfoDisk writeOldSplits(JobConf jobConf, Path inputSplitDir) throws IOException { @@ -605,11 +575,11 @@ private static InputSplitInfoDisk writeOldSplits(JobConf jobConf, inputSplitDir.getFileSystem(jobConf), splits); List locationHints = - new ArrayList(splits.length); - for (int i = 0; i < splits.length; ++i) { + new ArrayList<>(splits.length); + for (InputSplit split : splits) { locationHints.add( - TaskLocationHint.createTaskLocationHint(new HashSet( - Arrays.asList(splits[i].getLocations())), null) + TaskLocationHint.createTaskLocationHint(new HashSet<>( + Arrays.asList(split.getLocations())), null) ); } @@ -640,10 +610,6 @@ private static InputSplitInfoDisk writeOldSplits(JobConf jobConf, * @return InputSplitInfo containing the split files' information and the * number of splits generated to be used to determining parallelism of * the map stage. - * - * @throws IOException - * @throws InterruptedException - * @throws ClassNotFoundException */ private static InputSplitInfoDisk generateInputSplits(Configuration conf, Path inputSplitsDir) throws IOException, InterruptedException, @@ -669,7 +635,6 @@ private static InputSplitInfoDisk generateInputSplits(Configuration conf, * @param conf Configuration * @param inputSplitInfo Information on location of split files * @param localResources LocalResources collection to be updated - * @throws IOException */ private static void updateLocalResourcesForInputSplits( Configuration conf, @@ -754,8 +719,8 @@ protected static UserPayload createMRInputPayload(Configuration conf, } private static UserPayload createMRInputPayload(ByteString bytes, - MRRuntimeProtos.MRSplitsProto mrSplitsProto, - boolean isGrouped, boolean isSorted) throws IOException { + MRRuntimeProtos.MRSplitsProto mrSplitsProto, + boolean isGrouped, boolean isSorted) { MRRuntimeProtos.MRInputUserPayloadProto.Builder userPayloadBuilder = MRRuntimeProtos.MRInputUserPayloadProto .newBuilder(); @@ -771,8 +736,8 @@ private static UserPayload createMRInputPayload(ByteString bytes, private static String getStringProperty(Configuration conf, String propertyName) { - Preconditions.checkNotNull(conf, "Configuration must be provided"); - Preconditions.checkNotNull(propertyName, "Property name must be provided"); + Objects.requireNonNull(conf, "Configuration must be provided"); + Objects.requireNonNull(propertyName, "Property name must be provided"); return conf.get(propertyName); } @@ -781,7 +746,7 @@ private static int getIntProperty(Configuration conf, String propertyName) { } /** - * @see {@link InputContext#getDagIdentifier} + * @see InputContext#getDagIdentifier() * @param conf configuration instance * @return dag index */ @@ -801,7 +766,7 @@ public static String getDagIdString(Configuration conf) { } /** - * * @see {@link InputContext#getTaskVertexIndex} + * @see InputContext#getTaskVertexIndex * @param conf configuration instance * @return vertex index */ @@ -821,7 +786,7 @@ public static String getVertexIdString(Configuration conf) { } /** - * @see {@link InputContext#getTaskIndex} + * @see InputContext#getTaskIndex * @param conf configuration instance * @return task index */ @@ -841,7 +806,7 @@ public static String getTaskIdString(Configuration conf) { } /** - * @see {@link InputContext#getTaskAttemptNumber} + * @see InputContext#getTaskAttemptNumber * @param conf configuration instance * @return task attempt index */ @@ -861,7 +826,7 @@ public static String getTaskAttemptIdString(Configuration conf) { } /** - * @see {@link InputContext#getInputIndex} + * @see InputContext#getInputIndex * @param conf configuration instance * @return input index */ @@ -871,7 +836,7 @@ public static int getInputIndex(Configuration conf) { } /** - * @see {@link InputContext#getDAGName} + * @see InputContext#getDAGName * @param conf configuration instance * @return dag name */ @@ -881,7 +846,7 @@ public static String getDagName(Configuration conf) { } /** - * @see {@link InputContext#getTaskVertexName} + * @see InputContext#getTaskVertexName * @param conf configuration instance * @return vertex name */ @@ -891,7 +856,7 @@ public static String getVertexName(Configuration conf) { } /** - * @see {@link InputContext#getSourceVertexName} + * @see InputContext#getSourceVertexName * @param conf configuration instance * @return source name */ @@ -901,7 +866,7 @@ public static String getInputName(Configuration conf) { } /** - * @see {@link InputContext#getApplicationId} + * @see InputContext#getApplicationId * @param conf configuration instance * @return applicationId as a string */ @@ -911,7 +876,7 @@ public static String getApplicationIdString(Configuration conf) { } /** - * @see {@link InputContext#getUniqueIdentifier} + * @see InputContext#getUniqueIdentifier * @param conf configuration instance * @return unique identifier for the input */ @@ -921,7 +886,7 @@ public static String getUniqueIdentifier(Configuration conf) { } /** - * @see {@link InputContext#getDAGAttemptNumber} + * @see InputContext#getDAGAttemptNumber * @param conf configuration instance * @return attempt number */ @@ -930,4 +895,29 @@ public static int getDagAttemptNumber(Configuration conf) { return getIntProperty(conf, MRInput.TEZ_MAPREDUCE_DAG_ATTEMPT_NUMBER); } + public static MRSplitProto getProto(InputDataInformationEvent initEvent, JobConf jobConf) throws IOException { + return Strings.isNullOrEmpty(initEvent.getSerializedPath()) ? readProtoFromPayload(initEvent) + : readProtoFromFs(initEvent, jobConf); + } + + private static MRSplitProto readProtoFromFs(InputDataInformationEvent initEvent, JobConf jobConf) throws IOException { + String serializedPath = initEvent.getSerializedPath(); + Path filePath = new Path(serializedPath); + LOG.info("Reading InputDataInformationEvent from path: {}", filePath); + + MRSplitProto splitProto = null; + FileSystem fs = filePath.getFileSystem(jobConf); + + try (FSDataInputStream in = fs.open(filePath)) { + splitProto = MRSplitProto.parseFrom(in); + fs.delete(filePath, false); + } + return splitProto; + } + + private static MRSplitProto readProtoFromPayload(InputDataInformationEvent initEvent) throws IOException { + ByteBuffer payload = initEvent.getUserPayload(); + LOG.info("Reading InputDataInformationEvent from payload: {}", payload); + return MRSplitProto.parseFrom(ByteString.copyFrom(payload)); + } } diff --git a/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/hadoop/MRJobConfig.java b/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/hadoop/MRJobConfig.java index 02c74b278d..f1183742fc 100644 --- a/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/hadoop/MRJobConfig.java +++ b/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/hadoop/MRJobConfig.java @@ -85,6 +85,15 @@ public interface MRJobConfig { public static final String SKIP_OUTDIR = "mapreduce.job.skip.outdir"; + public static final String SPECULATIVECAP_RUNNING_TASKS = "mapreduce.job.speculative.speculative-cap-running-tasks"; + + public static final String RETRY_AFTER_NO_SPECULATE = "mapreduce.job.speculative.retry-after-no-speculate"; + + public static final String RETRY_AFTER_SPECULATE = "mapreduce.job.speculative.retry-after-speculate"; + + public static final String MINIMUM_ALLOWED_TASKS = "mapreduce.job.speculative.minimum-allowed-tasks"; + + public static final String SPECULATIVECAP_TOTAL_TASKS = "mapreduce.job.speculative.speculative-cap-total-tasks"; public static final String JOB_LOCAL_DIR = "mapreduce.job.local.dir"; @@ -122,6 +131,13 @@ public interface MRJobConfig { public static final String CACHE_ARCHIVES_VISIBILITIES = "mapreduce.job.cache.archives.visibilities"; + /** + * Used by committers to set a job-wide UUID. + */ + public static final String JOB_COMMITTER_UUID = "job.committer.uuid"; + + public static final String FILEOUTPUTCOMMITTER_ALGORITHM_VERSION = "mapreduce.fileoutputcommitter.algorithm.version"; + /** * @deprecated Symlinks are always on and cannot be disabled. */ @@ -291,6 +307,8 @@ public interface MRJobConfig { public static final String JOB_NAMENODES = "mapreduce.job.hdfs-servers"; + public static final String JOB_NAMENODES_TOKEN_RENEWAL_EXCLUDE = "mapreduce.job.hdfs-servers.token-renewal.exclude"; + public static final String JOB_JOBTRACKER_ID = "mapreduce.job.kerberos.jtprinicipal"; public static final String JOB_CANCEL_DELEGATION_TOKEN = "mapreduce.job.complete.cancel.delegation.tokens"; @@ -632,6 +650,7 @@ public interface MRJobConfig { "mrr.vertex."; public static final String VERTEX_NAME = "mapreduce.task.vertex.name"; + public static final String VERTEX_ID = "mapreduce.task.vertex.id"; public static final String MR_TEZ_SPLITS_VIA_EVENTS = MR_TEZ_PREFIX + "splits.via.events"; public static final boolean MR_TEZ_SPLITS_VIA_EVENTS_DEFAULT = true; diff --git a/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/hadoop/MultiStageMRConfToTezTranslator.java b/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/hadoop/MultiStageMRConfToTezTranslator.java index 3f5ad230cf..de38766cd3 100644 --- a/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/hadoop/MultiStageMRConfToTezTranslator.java +++ b/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/hadoop/MultiStageMRConfToTezTranslator.java @@ -25,13 +25,14 @@ import org.apache.tez.runtime.library.api.TezRuntimeConfiguration; -public class MultiStageMRConfToTezTranslator { +public final class MultiStageMRConfToTezTranslator { + + private MultiStageMRConfToTezTranslator() {} /** * Given a single base MRR config, returns a list of complete stage * configurations. - * - * @param conf + * * @return list of complete stage configurations given Conifiguration */ @Private @@ -62,14 +63,13 @@ public static Configuration[] getStageConfs(Configuration conf) { } } - Configuration confs[] = new Configuration[numStages]; + Configuration[] confs = new Configuration[numStages]; Configuration nonItermediateConf = MultiStageMRConfigUtil.extractStageConf( conf, ""); + confs[0] = nonItermediateConf; if (numStages == 1) { - confs[0] = nonItermediateConf; confs[0].setBoolean(MRConfig.IS_MAP_PROCESSOR, true); } else { - confs[0] = nonItermediateConf; confs[numStages - 1] = new Configuration(nonItermediateConf); confs[numStages -1].setBoolean(MRConfig.IS_MAP_PROCESSOR, false); } diff --git a/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/hadoop/MultiStageMRConfigUtil.java b/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/hadoop/MultiStageMRConfigUtil.java index 13e0b860eb..23ffd3ef75 100644 --- a/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/hadoop/MultiStageMRConfigUtil.java +++ b/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/hadoop/MultiStageMRConfigUtil.java @@ -18,14 +18,15 @@ package org.apache.tez.mapreduce.hadoop; -import java.util.Iterator; import java.util.Map.Entry; import org.apache.hadoop.classification.InterfaceAudience.Private; import org.apache.hadoop.conf.Configuration; @Private -public class MultiStageMRConfigUtil { +public final class MultiStageMRConfigUtil { + + private MultiStageMRConfigUtil() {} ////////////////////////////////////////////////////////////////////////////// // Methods based on Stage Num // @@ -73,9 +74,7 @@ static Configuration extractStageConf(Configuration baseConf, String prefix) { Configuration strippedConf = new Configuration(false); Configuration conf = new Configuration(false); - Iterator> confEntries = baseConf.iterator(); - while (confEntries.hasNext()) { - Entry entry = confEntries.next(); + for (Entry entry : baseConf) { String key = entry.getKey(); if (key.startsWith(prefix)) { // Ignore keys for other intermediate stages in case of an initial or final stage. @@ -95,9 +94,7 @@ static Configuration extractStageConf(Configuration baseConf, } // Replace values from strippedConf into the finalConf. Override values // which may have been copied over from the baseConf root level. - Iterator> entries = strippedConf.iterator(); - while (entries.hasNext()) { - Entry entry = entries.next(); + for (Entry entry : strippedConf) { if (!Configuration.isDeprecated(entry.getKey())) { conf.set(entry.getKey(), entry.getValue()); } diff --git a/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/hadoop/TezTypeConverters.java b/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/hadoop/TezTypeConverters.java index 6f9c1c7600..0efde4ac70 100644 --- a/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/hadoop/TezTypeConverters.java +++ b/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/hadoop/TezTypeConverters.java @@ -29,7 +29,9 @@ import org.apache.tez.dag.records.TezDAGID; import org.apache.tez.dag.records.TezTaskAttemptID; -public class TezTypeConverters { +public final class TezTypeConverters { + + private TezTypeConverters() {} // TODO Remove unused methods @@ -46,15 +48,13 @@ public static org.apache.hadoop.mapreduce.JobID toJobID(TezDAGID id) { public static TaskAttemptId toYarn(TezTaskAttemptID taskAttemptId) { TaskAttemptID mrTaskAttemptId = IDConverter .toMRTaskAttemptId(taskAttemptId); - TaskAttemptId mrv2TaskAttemptId = TypeConverter.toYarn(mrTaskAttemptId); - return mrv2TaskAttemptId; + return TypeConverter.toYarn(mrTaskAttemptId); } public static TezTaskAttemptID toTez(TaskAttemptId taskAttemptId) { TaskAttemptID mrTaskAttemptId = TypeConverter.fromYarn(taskAttemptId); - TezTaskAttemptID tezTaskAttemptId = IDConverter + return IDConverter .fromMRTaskAttemptId(mrTaskAttemptId); - return tezTaskAttemptId; } public static Counters fromTez(TezCounters tezCounters) { diff --git a/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/hadoop/mapred/MRCounters.java b/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/hadoop/mapred/MRCounters.java index cc9b6baf92..b6b8e150ea 100644 --- a/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/hadoop/mapred/MRCounters.java +++ b/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/hadoop/mapred/MRCounters.java @@ -26,7 +26,7 @@ import java.util.Collection; import java.util.Iterator; -import org.apache.commons.collections.IteratorUtils; +import org.apache.commons.collections4.IteratorUtils; public class MRCounters extends org.apache.hadoop.mapred.Counters { private final org.apache.tez.common.counters.TezCounters raw; diff --git a/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/hadoop/mapreduce/TaskAttemptContextImpl.java b/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/hadoop/mapreduce/TaskAttemptContextImpl.java index e5e7022064..941c8732bd 100644 --- a/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/hadoop/mapreduce/TaskAttemptContextImpl.java +++ b/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/hadoop/mapreduce/TaskAttemptContextImpl.java @@ -53,8 +53,8 @@ public static org.apache.hadoop.mapred.TaskAttemptID createMockTaskAttemptID( public static org.apache.hadoop.mapred.TaskAttemptID createMockTaskAttemptIDFromTezTaskAttemptId(TezTaskAttemptID tezTaId, boolean isMap) { - TezVertexID vId = tezTaId.getTaskID().getVertexID(); - ApplicationId appId = vId.getDAGId().getApplicationId(); + TezVertexID vId = tezTaId.getVertexID(); + ApplicationId appId = vId.getApplicationId(); return new org.apache.hadoop.mapred.TaskAttemptID( new org.apache.hadoop.mapred.TaskID(String.valueOf(appId.getClusterTimestamp()) + String.valueOf(vId.getId()), appId.getId(), @@ -65,7 +65,7 @@ public static org.apache.hadoop.mapred.TaskAttemptID createMockTaskAttemptID( public static org.apache.hadoop.mapred.TaskID createMockTaskAttemptIDFromTezTaskId(TezTaskID tezTaId, boolean isMap) { TezVertexID vId = tezTaId.getVertexID(); - ApplicationId appId = vId.getDAGId().getApplicationId(); + ApplicationId appId = vId.getApplicationId(); return new org.apache.hadoop.mapred.TaskID(String.valueOf(appId.getClusterTimestamp()) + String.valueOf(vId.getId()), appId.getId(), isMap ? TaskType.MAP : TaskType.REDUCE, tezTaId.getId()); diff --git a/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/input/MRInput.java b/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/input/MRInput.java index 248a92ac4b..8c3d5d5dbb 100644 --- a/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/input/MRInput.java +++ b/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/input/MRInput.java @@ -23,11 +23,10 @@ import java.net.URI; import java.util.Collection; import java.util.List; +import java.util.Objects; import java.util.concurrent.locks.Condition; import java.util.concurrent.locks.ReentrantLock; -import com.google.protobuf.ByteString; - import org.apache.tez.runtime.api.ProgressFailedException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -70,7 +69,8 @@ import org.apache.tez.runtime.library.api.KeyValueReader; import org.apache.tez.runtime.library.api.TezRuntimeConfiguration; -import com.google.common.base.Preconditions; +import org.apache.tez.common.Preconditions; + import com.google.common.collect.Lists; /** @@ -175,7 +175,7 @@ private MRInputConfigBuilder setInputPaths(String inputPaths) { } private void initializeInputPath() { - Preconditions.checkState(inputFormatProvided == false, + Preconditions.checkState(!inputFormatProvided, "Should only be invoked when no inputFormat is provided"); if (org.apache.hadoop.mapred.FileInputFormat.class.isAssignableFrom(inputFormat) || FileInputFormat.class.isAssignableFrom(inputFormat)) { @@ -436,8 +436,6 @@ public static MRInputConfigBuilder createConfigBuilder(Configuration conf, private final ReentrantLock rrLock = new ReentrantLock(); private final Condition rrInited = rrLock.newCondition(); - - private volatile boolean eventReceived = false; private boolean readerCreated = false; @@ -460,7 +458,7 @@ public List initialize() throws IOException { getContext().inputIsReady(); this.splitInfoViaEvents = jobConf.getBoolean(MRJobConfig.MR_TEZ_SPLITS_VIA_EVENTS, MRJobConfig.MR_TEZ_SPLITS_VIA_EVENTS_DEFAULT); - LOG.info(getContext().getSourceVertexName() + " using newmapreduce API=" + useNewApi + + LOG.info(getContext().getInputOutputVertexNames() + " using newmapreduce API=" + useNewApi + ", split via event=" + splitInfoViaEvents + ", numPhysicalInputs=" + getNumPhysicalInputs()); initializeInternal(); @@ -490,8 +488,8 @@ void initializeInternal() throws IOException { getContext()); } } else { - TaskSplitMetaInfo[] allMetaInfo = MRInputUtils.readSplits(jobConf); - TaskSplitMetaInfo thisTaskMetaInfo = allMetaInfo[getContext().getTaskIndex()]; + TaskSplitMetaInfo thisTaskMetaInfo = MRInputUtils.getSplits(jobConf, + getContext().getTaskIndex()); TaskSplitIndex splitMetaInfo = new TaskSplitIndex(thisTaskMetaInfo.getSplitLocation(), thisTaskMetaInfo.getStartOffset()); long splitLength = -1; @@ -525,7 +523,7 @@ inputRecordCounter, getContext().getApplicationId().getClusterTimestamp(), } finally { rrLock.unlock(); } - LOG.info("Initialized MRInput: " + getContext().getSourceVertexName()); + LOG.info("Initialized MRInput: " + getContext().getInputOutputVertexNames()); } /** @@ -536,24 +534,24 @@ inputRecordCounter, getContext().getApplicationId().getClusterTimestamp(), @Override public KeyValueReader getReader() throws IOException { Preconditions - .checkState(readerCreated == false, + .checkState(!readerCreated, "Only a single instance of record reader can be created for this input."); readerCreated = true; if (getNumPhysicalInputs() == 0) { return new KeyValueReader() { @Override - public boolean next() throws IOException { + public boolean next() { getContext().notifyProgress(); return false; } @Override - public Object getCurrentKey() throws IOException { + public Object getCurrentKey() { return null; } @Override - public Object getCurrentValue() throws IOException { + public Object getCurrentValue() { return null; } }; @@ -575,11 +573,11 @@ public void handleEvents(List inputEvents) throws Exception { throw new IllegalStateException( "Unexpected event. MRInput has been setup to receive 0 events"); } - if (eventReceived || inputEvents.size() != 1) { + + if (inputEvents.size() != 1) { throw new IllegalStateException( "MRInput expects only a single input. Received: current eventListSize: " - + inputEvents.size() + "Received previous input: " - + eventReceived); + + inputEvents.size() + "Received previous input: false"); } Event event = inputEvents.iterator().next(); Preconditions.checkArgument(event instanceof InputDataInformationEvent, @@ -592,7 +590,10 @@ public void handleEvents(List inputEvents) throws Exception { @Override public List close() throws IOException { - mrReader.close(); + if (mrReader != null) { + mrReader.close(); + mrReader = null; + } long inputRecords = getContext().getCounters() .findCounter(TaskCounter.INPUT_RECORDS_PROCESSED).getValue(); getContext().getStatisticsReporter().reportItemsProcessed(inputRecords); @@ -630,7 +631,7 @@ void processSplitEvent(InputDataInformationEvent event) try { initFromEventInternal(event); if (LOG.isDebugEnabled()) { - LOG.debug(getContext().getSourceVertexName() + " notifying on RecordReader initialized"); + LOG.debug(getContext().getInputOutputVertexNames() + " notifying on RecordReader initialized"); } rrInited.signal(); } finally { @@ -643,7 +644,7 @@ void checkAndAwaitRecordReaderInitialization() throws IOException { rrLock.lock(); try { if (LOG.isDebugEnabled()) { - LOG.debug(getContext().getSourceVertexName() + " awaiting RecordReader initialization"); + LOG.debug(getContext().getInputOutputVertexNames() + " awaiting RecordReader initialization"); } rrInited.await(); } catch (Exception e) { @@ -667,10 +668,10 @@ void initFromEvent(InputDataInformationEvent initEvent) private void initFromEventInternal(InputDataInformationEvent initEvent) throws IOException { if (LOG.isDebugEnabled()) { - LOG.debug(getContext().getSourceVertexName() + " initializing RecordReader from event"); + LOG.debug(getContext().getInputOutputVertexNames() + " initializing RecordReader from event"); } - Preconditions.checkState(initEvent != null, "InitEvent must be specified"); - MRSplitProto splitProto = MRSplitProto.parseFrom(ByteString.copyFrom(initEvent.getUserPayload())); + Objects.requireNonNull(initEvent, "InitEvent must be specified"); + MRSplitProto splitProto = MRInputHelpers.getProto(initEvent, jobConf); Object splitObj = null; long splitLength = -1; if (useNewApi) { @@ -682,7 +683,7 @@ private void initFromEventInternal(InputDataInformationEvent initEvent) throws I LOG.warn("Thread interrupted while getting split length: ", e); } if (LOG.isDebugEnabled()) { - LOG.debug(getContext().getSourceVertexName() + " split Details -> SplitClass: " + + LOG.debug(getContext().getInputOutputVertexNames() + " split Details -> SplitClass: " + split.getClass().getName() + ", NewSplit: " + split + ", length: " + splitLength); } @@ -692,7 +693,7 @@ private void initFromEventInternal(InputDataInformationEvent initEvent) throws I splitObj = split; splitLength = split.getLength(); if (LOG.isDebugEnabled()) { - LOG.debug(getContext().getSourceVertexName() + " split Details -> SplitClass: " + + LOG.debug(getContext().getInputOutputVertexNames() + " split Details -> SplitClass: " + split.getClass().getName() + ", OldSplit: " + split + ", length: " + splitLength); } } @@ -701,7 +702,7 @@ private void initFromEventInternal(InputDataInformationEvent initEvent) throws I .increment(splitLength); } mrReader.setSplit(splitObj); - LOG.info(getContext().getSourceVertexName() + " initialized RecordReader from event"); + LOG.info(getContext().getInputOutputVertexNames() + " initialized RecordReader from event"); } private static class MRInputHelpersInternal extends MRInputHelpers { diff --git a/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/input/MRInputLegacy.java b/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/input/MRInputLegacy.java index 70be7ee444..bbf145eb6c 100644 --- a/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/input/MRInputLegacy.java +++ b/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/input/MRInputLegacy.java @@ -74,7 +74,7 @@ public MRInputLegacy(InputContext inputContext, int numPhysicalInputs) { @Private protected void initializeInternal() throws IOException { - LOG.info(getContext().getSourceVertexName() + " MRInputLegacy deferring initialization"); + LOG.info(getContext().getInputOutputVertexNames() + " MRInputLegacy deferring initialization"); } @Private @@ -136,7 +136,7 @@ void checkAndAwaitRecordReaderInitialization() throws IOException { if (splitInfoViaEvents && !inited) { if (initEvent == null) { if (LOG.isDebugEnabled()) { - LOG.debug(getContext().getSourceVertexName() + + LOG.debug(getContext().getInputOutputVertexNames() + " awaiting init event before initializing record reader"); } diff --git a/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/input/MultiMRInput.java b/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/input/MultiMRInput.java index de54b0d1f4..4a98052211 100644 --- a/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/input/MultiMRInput.java +++ b/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/input/MultiMRInput.java @@ -25,12 +25,13 @@ import java.util.Collections; import java.util.LinkedList; import java.util.List; +import java.util.Objects; import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.locks.Condition; import java.util.concurrent.locks.ReentrantLock; import com.google.common.base.Function; -import com.google.common.base.Preconditions; +import org.apache.tez.common.Preconditions; import com.google.common.collect.Lists; import com.google.protobuf.ByteString; @@ -109,7 +110,7 @@ private MultiMRInputConfigBuilder(Configuration conf, Class inputFormat) { @Override public List initialize() throws IOException { super.initialize(); - LOG.info(getContext().getSourceVertexName() + " using newmapreduce API=" + useNewApi + + LOG.info(getContext().getInputOutputVertexNames() + " using newmapreduce API=" + useNewApi + ", numPhysicalInputs=" + getNumPhysicalInputs()); if (getNumPhysicalInputs() == 0) { getContext().inputIsReady(); @@ -164,9 +165,9 @@ public void handleEvents(List inputEvents) throws Exception { } private MRReader initFromEvent(InputDataInformationEvent event) throws IOException { - Preconditions.checkState(event != null, "Event must be specified"); + Objects.requireNonNull(event, "Event must be specified"); if (LOG.isDebugEnabled()) { - LOG.debug(getContext().getSourceVertexName() + " initializing Reader: " + eventCount.get()); + LOG.debug(getContext().getInputOutputVertexNames() + " initializing Reader: " + eventCount.get()); } MRSplitProto splitProto = MRSplitProto.parseFrom(ByteString.copyFrom(event.getUserPayload())); MRReader reader = null; @@ -185,7 +186,7 @@ private MRReader initFromEvent(InputDataInformationEvent event) throws IOExcepti .getApplicationId().getId(), getContext().getTaskIndex(), getContext() .getTaskAttemptNumber(), getContext()); if (LOG.isDebugEnabled()) { - LOG.debug(getContext().getSourceVertexName() + " split Details -> SplitClass: " + + LOG.debug(getContext().getInputOutputVertexNames() + " split Details -> SplitClass: " + split.getClass().getName() + ", NewSplit: " + split + ", length: " + splitLength); } } else { @@ -195,7 +196,7 @@ private MRReader initFromEvent(InputDataInformationEvent event) throws IOExcepti reader = new MRReaderMapred(localJobConf, split, getContext().getCounters(), inputRecordCounter, getContext()); if (LOG.isDebugEnabled()) { - LOG.debug(getContext().getSourceVertexName() + " split Details -> SplitClass: " + + LOG.debug(getContext().getInputOutputVertexNames() + " split Details -> SplitClass: " + split.getClass().getName() + ", OldSplit: " + split + ", length: " + splitLength); } } @@ -203,7 +204,7 @@ private MRReader initFromEvent(InputDataInformationEvent event) throws IOExcepti getContext().getCounters().findCounter(TaskCounter.INPUT_SPLIT_LENGTH_BYTES) .increment(splitLength); } - LOG.info(getContext().getSourceVertexName() + " initialized RecordReader from event"); + LOG.info(getContext().getInputOutputVertexNames() + " initialized RecordReader from event"); return reader; } diff --git a/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/input/base/MRInputBase.java b/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/input/base/MRInputBase.java index 9a26c2b7c5..ccae0b1964 100644 --- a/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/input/base/MRInputBase.java +++ b/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/input/base/MRInputBase.java @@ -18,7 +18,7 @@ package org.apache.tez.mapreduce.input.base; -import com.google.common.base.Preconditions; +import org.apache.tez.common.Preconditions; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; @@ -72,8 +72,9 @@ public List initialize() throws IOException { boolean isGrouped = mrUserPayload.getGroupingEnabled(); Preconditions.checkArgument(mrUserPayload.hasSplits() == false, "Split information not expected in " + this.getClass().getName()); - Configuration conf = TezUtils - .createConfFromByteString(mrUserPayload.getConfigurationBytes()); + + Configuration conf = new JobConf(getContext().getContainerConfiguration()); + TezUtils.addToConfFromByteString(conf, mrUserPayload.getConfigurationBytes()); this.jobConf = new JobConf(conf); useNewApi = this.jobConf.getUseNewMapper(); if (isGrouped) { diff --git a/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/lib/MRInputUtils.java b/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/lib/MRInputUtils.java index bc96e388da..e3c4f0e3a3 100644 --- a/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/lib/MRInputUtils.java +++ b/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/lib/MRInputUtils.java @@ -43,15 +43,15 @@ * Helper methods for InputFormat based Inputs. Private to Tez. */ @Private -public class MRInputUtils { +public final class MRInputUtils { private static final Logger LOG = LoggerFactory.getLogger(MRInputUtils.class); - public static TaskSplitMetaInfo[] readSplits(Configuration conf) throws IOException { - TaskSplitMetaInfo[] allTaskSplitMetaInfo; - allTaskSplitMetaInfo = SplitMetaInfoReaderTez - .readSplitMetaInfo(conf, FileSystem.getLocal(conf)); - return allTaskSplitMetaInfo; + private MRInputUtils() {} + + public static TaskSplitMetaInfo getSplits(Configuration conf, int index) throws IOException { + return SplitMetaInfoReaderTez + .getSplitMetaInfo(conf, FileSystem.getLocal(conf), index); } public static org.apache.hadoop.mapreduce.InputSplit getNewSplitDetailsFromEvent( @@ -79,12 +79,10 @@ public static org.apache.hadoop.mapreduce.InputSplit getNewSplitDetailsFromDisk( try { cls = (Class) jobConf.getClassByName(className); } catch (ClassNotFoundException ce) { - IOException wrap = new IOException("Split class " + className + " not found"); - wrap.initCause(ce); - throw wrap; + throw new IOException("Split class " + className + " not found", ce); } SerializationFactory factory = new SerializationFactory(jobConf); - Deserializer deserializer = (Deserializer) factory + Deserializer deserializer = factory .getDeserializer(cls); deserializer.open(inFile); org.apache.hadoop.mapreduce.InputSplit split = deserializer.deserialize(null); @@ -112,12 +110,10 @@ public static InputSplit getOldSplitDetailsFromDisk(TaskSplitIndex splitMetaInfo try { cls = (Class) jobConf.getClassByName(className); } catch (ClassNotFoundException ce) { - IOException wrap = new IOException("Split class " + className + " not found"); - wrap.initCause(ce); - throw wrap; + throw new IOException("Split class " + className + " not found", ce); } SerializationFactory factory = new SerializationFactory(jobConf); - Deserializer deserializer = (Deserializer) factory + Deserializer deserializer = factory .getDeserializer(cls); deserializer.open(inFile); org.apache.hadoop.mapred.InputSplit split = deserializer.deserialize(null); diff --git a/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/lib/MRReaderMapReduce.java b/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/lib/MRReaderMapReduce.java index 10b871edcc..ad3d4d67ab 100644 --- a/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/lib/MRReaderMapReduce.java +++ b/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/lib/MRReaderMapReduce.java @@ -19,6 +19,7 @@ package org.apache.tez.mapreduce.lib; import java.io.IOException; +import java.util.Objects; import org.apache.tez.runtime.api.InputContext; import org.apache.tez.runtime.library.api.IOInterruptedException; @@ -34,8 +35,6 @@ import org.apache.hadoop.mapreduce.InputFormat; import org.apache.hadoop.mapreduce.RecordReader; -import com.google.common.base.Preconditions; - public class MRReaderMapReduce extends MRReader { private static final Logger LOG = LoggerFactory.getLogger(MRReader.class); @@ -151,7 +150,7 @@ public Object getCurrentValue() throws IOException { } private void setupNewRecordReader() throws IOException { - Preconditions.checkNotNull(inputSplit, "Input split hasn't yet been setup"); + Objects.requireNonNull(inputSplit, "Input split hasn't yet been setup"); try { recordReader = inputFormat.createRecordReader(inputSplit, taskAttemptContext); recordReader.initialize(inputSplit, taskAttemptContext); diff --git a/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/lib/MRReaderMapred.java b/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/lib/MRReaderMapred.java index d81debb732..e04ae7f046 100644 --- a/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/lib/MRReaderMapred.java +++ b/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/lib/MRReaderMapred.java @@ -19,6 +19,7 @@ package org.apache.tez.mapreduce.lib; import java.io.IOException; +import java.util.Objects; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -29,14 +30,13 @@ import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.JobContext; import org.apache.hadoop.mapred.RecordReader; +import org.apache.tez.dag.api.TezConfiguration; import org.apache.tez.common.counters.TezCounter; import org.apache.tez.common.counters.TezCounters; import org.apache.tez.mapreduce.hadoop.mapred.MRReporter; import org.apache.tez.mapreduce.input.MRInput; import org.apache.tez.runtime.api.InputContext; -import com.google.common.base.Preconditions; - public class MRReaderMapred extends MRReader { private static final Logger LOG = LoggerFactory.getLogger(MRReaderMapred.class); @@ -146,6 +146,13 @@ public Object getCurrentValue() throws IOException { * @return the additional fields set by {@link MRInput} */ public Configuration getConfigUpdates() { + String propertyList = jobConf.get(TezConfiguration.TEZ_MRREADER_CONFIG_UPDATE_PROPERTIES); + if (propertyList != null) { + String[] properties = propertyList.split(","); + for (String prop : properties) { + addToIncrementalConfFromJobConf(prop); + } + } if (incrementalConf != null) { return new Configuration(incrementalConf); } @@ -153,7 +160,7 @@ public Configuration getConfigUpdates() { } private void setupOldRecordReader() throws IOException { - Preconditions.checkNotNull(inputSplit, "Input split hasn't yet been setup"); + Objects.requireNonNull(inputSplit, "Input split hasn't yet been setup"); recordReader = inputFormat.getRecordReader(inputSplit, this.jobConf, new MRReporter( tezCounters, inputSplit)); setIncrementalConfigParams(inputSplit); @@ -162,15 +169,24 @@ private void setupOldRecordReader() throws IOException { setupComplete = true; } - private void setIncrementalConfigParams(InputSplit inputSplit) { - if (inputSplit instanceof FileSplit) { - FileSplit fileSplit = (FileSplit) inputSplit; + private void setIncrementalConfigParams(InputSplit split) { + if (split instanceof FileSplit) { + FileSplit fileSplit = (FileSplit) split; this.incrementalConf = new Configuration(false); this.incrementalConf.set(JobContext.MAP_INPUT_FILE, fileSplit.getPath().toString()); this.incrementalConf.setLong(JobContext.MAP_INPUT_START, fileSplit.getStart()); this.incrementalConf.setLong(JobContext.MAP_INPUT_PATH, fileSplit.getLength()); } - LOG.info("Processing split: " + inputSplit); + LOG.info("Processing split: " + split); + } + + private void addToIncrementalConfFromJobConf(String property) { + if (jobConf.get(property) != null) { + if (incrementalConf == null) { + incrementalConf = new Configuration(false); + } + incrementalConf.set(property, jobConf.get(property)); + } } } diff --git a/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/output/MROutput.java b/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/output/MROutput.java index 6ed70c5e73..b8ac1b3a54 100644 --- a/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/output/MROutput.java +++ b/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/output/MROutput.java @@ -29,9 +29,11 @@ import java.util.concurrent.atomic.AtomicBoolean; import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; +import com.google.protobuf.ByteString; +import org.apache.tez.common.Preconditions; import org.apache.commons.lang.StringUtils; import org.apache.hadoop.mapreduce.lib.output.LazyOutputFormat; +import org.apache.tez.mapreduce.common.Utils; import org.apache.tez.runtime.library.api.IOInterruptedException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -398,8 +400,9 @@ protected List initializeBase() throws IOException, InterruptedException taskNumberFormat.setGroupingUsed(false); nonTaskNumberFormat.setMinimumIntegerDigits(3); nonTaskNumberFormat.setGroupingUsed(false); - Configuration conf = TezUtils.createConfFromUserPayload(getContext().getUserPayload()); - this.jobConf = new JobConf(conf); + UserPayload userPayload = getContext().getUserPayload(); + this.jobConf = new JobConf(getContext().getContainerConfiguration()); + TezUtils.addToConfFromByteString(this.jobConf, ByteString.copyFrom(userPayload.getPayload())); // Add tokens to the jobConf - in case they are accessed within the RW / OF jobConf.getCredentials().mergeAll(UserGroupInformation.getCurrentUser().getCredentials()); this.isMapperOutput = jobConf.getBoolean(MRConfig.IS_MAP_PROCESSOR, @@ -411,6 +414,7 @@ protected List initializeBase() throws IOException, InterruptedException } jobConf.setInt(MRJobConfig.APPLICATION_ATTEMPT_ID, getContext().getDAGAttemptNumber()); + jobConf.set(MRJobConfig.JOB_COMMITTER_UUID, Utils.getDAGID(getContext())); TaskAttemptID taskAttemptId = org.apache.tez.mapreduce.hadoop.mapreduce.TaskAttemptContextImpl .createMockTaskAttemptID(getContext().getApplicationId().getClusterTimestamp(), getContext().getTaskVertexIndex(), getContext().getApplicationId().getId(), @@ -456,7 +460,7 @@ protected List initializeBase() throws IOException, InterruptedException initCommitter(jobConf, useNewApi); } - LOG.info(getContext().getDestinationVertexName() + ": " + LOG.info(getContext().getInputOutputVertexNames() + ": " + "outputFormat=" + outputFormatClassName + ", using newmapreduce API=" + useNewApi); return null; @@ -486,9 +490,7 @@ public void initCommitter(JobConf job, boolean useNewApi) throws IOException, InterruptedException { if (useNewApi) { - if (LOG.isDebugEnabled()) { - LOG.debug("using new api for output committer"); - } + LOG.debug("using new api for output committer"); this.committer = newOutputFormat.getOutputCommitter( newApiTaskAttemptContext); @@ -576,7 +578,7 @@ public void handleEvents(List outputEvents) { @Override public synchronized List close() throws IOException { flush(); - LOG.info(getContext().getDestinationVertexName() + " closed"); + LOG.info(getContext().getInputOutputVertexNames() + " closed"); long outputRecords = getContext().getCounters() .findCounter(TaskCounter.OUTPUT_RECORDS).getValue(); getContext().getStatisticsReporter().reportItemsProcessed(outputRecords); diff --git a/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/processor/MRTask.java b/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/processor/MRTask.java index b79f19cfa0..55e5709c3b 100644 --- a/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/processor/MRTask.java +++ b/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/processor/MRTask.java @@ -36,7 +36,6 @@ import org.apache.hadoop.classification.InterfaceAudience.Private; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.FileSystem.Statistics; import org.apache.hadoop.fs.LocalDirAllocator; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.DataInputBuffer; @@ -321,8 +320,7 @@ public void initTask(LogicalOutput output) throws IOException, } this.mrReporter = new MRTaskReporter(processorContext); this.useNewApi = jobConf.getUseNewMapper(); - TezDAGID dagId = IDConverter.fromMRTaskAttemptId(taskAttemptId).getTaskID() - .getVertexID().getDAGId(); + TezDAGID dagId = IDConverter.fromMRTaskAttemptId(taskAttemptId).getDAGID(); this.jobContext = new JobContextImpl(jobConf, dagId, mrReporter); this.taskAttemptContext = @@ -509,10 +507,7 @@ public void close() throws IOException { comparator, keyClass, valueClass); - if (LOG.isDebugEnabled()) { - LOG.debug("Using key class: " + keyClass - + ", valueClass: " + valueClass); - } + LOG.debug("Using key class: {}, valueClass: {}", keyClass, valueClass); org.apache.hadoop.mapreduce.Reducer.Context reducerContext = diff --git a/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/processor/MRTaskReporter.java b/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/processor/MRTaskReporter.java index 2fa75bf6f4..e3fdc27a93 100644 --- a/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/processor/MRTaskReporter.java +++ b/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/processor/MRTaskReporter.java @@ -23,6 +23,7 @@ import org.apache.hadoop.mapred.Counters; import org.apache.hadoop.mapred.InputSplit; import org.apache.hadoop.mapred.Reporter; +import org.apache.tez.common.ProgressHelper; import org.apache.tez.common.counters.TezCounter; import org.apache.tez.mapreduce.hadoop.mapred.MRCounters; import org.apache.tez.mapreduce.hadoop.mapred.MRReporter; @@ -62,6 +63,9 @@ public MRTaskReporter(InputContext context) { } public void setProgress(float progress) { + // Validate that the progress is within the valid range. This guarantees + // that reporter and processorContext gets the same value. + progress = ProgressHelper.processProgress(progress); reporter.setProgress(progress); if (isProcessorContext) { ((ProcessorContext)context).setProgress(progress); diff --git a/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/processor/reduce/ReduceProcessor.java b/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/processor/reduce/ReduceProcessor.java index 4b79c78c45..63b168f267 100644 --- a/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/processor/reduce/ReduceProcessor.java +++ b/tez-mapreduce/src/main/java/org/apache/tez/mapreduce/processor/reduce/ReduceProcessor.java @@ -309,6 +309,12 @@ public boolean isSameKey() throws IOException { public DataInputBuffer getValue() throws IOException { return rawIter.getValue(); } + + @Override + public boolean hasNext() throws IOException { + return rawIter.hasNext(); + } + public boolean next() throws IOException { boolean ret = rawIter.next(); reporter.setProgress(rawIter.getProgress().getProgress()); diff --git a/tez-mapreduce/src/main/javadoc/resources/META-INF/LICENSE.txt b/tez-mapreduce/src/main/javadoc/resources/META-INF/LICENSE similarity index 100% rename from tez-mapreduce/src/main/javadoc/resources/META-INF/LICENSE.txt rename to tez-mapreduce/src/main/javadoc/resources/META-INF/LICENSE diff --git a/tez-mapreduce/src/main/javadoc/resources/META-INF/NOTICE b/tez-mapreduce/src/main/javadoc/resources/META-INF/NOTICE new file mode 100644 index 0000000000..2595905699 --- /dev/null +++ b/tez-mapreduce/src/main/javadoc/resources/META-INF/NOTICE @@ -0,0 +1,6 @@ +Apache Tez +Copyright 2014-2024 The Apache Software Foundation + +This product includes software developed at +The Apache Software Foundation (http://www.apache.org/). + diff --git a/tez-mapreduce/src/main/javadoc/resources/META-INF/NOTICE.txt b/tez-mapreduce/src/main/javadoc/resources/META-INF/NOTICE.txt deleted file mode 100644 index 3f36fcc6ba..0000000000 --- a/tez-mapreduce/src/main/javadoc/resources/META-INF/NOTICE.txt +++ /dev/null @@ -1,6 +0,0 @@ -Apache Tez -Copyright (c) 2016 The Apache Software Foundation - -This product includes software developed at -The Apache Software Foundation (http://www.apache.org/). - diff --git a/tez-mapreduce/src/main/resources/META-INF/LICENSE.txt b/tez-mapreduce/src/main/resources/META-INF/LICENSE similarity index 100% rename from tez-mapreduce/src/main/resources/META-INF/LICENSE.txt rename to tez-mapreduce/src/main/resources/META-INF/LICENSE diff --git a/tez-mapreduce/src/main/resources/META-INF/NOTICE b/tez-mapreduce/src/main/resources/META-INF/NOTICE new file mode 100644 index 0000000000..2595905699 --- /dev/null +++ b/tez-mapreduce/src/main/resources/META-INF/NOTICE @@ -0,0 +1,6 @@ +Apache Tez +Copyright 2014-2024 The Apache Software Foundation + +This product includes software developed at +The Apache Software Foundation (http://www.apache.org/). + diff --git a/tez-mapreduce/src/main/resources/META-INF/NOTICE.txt b/tez-mapreduce/src/main/resources/META-INF/NOTICE.txt deleted file mode 100644 index 3f36fcc6ba..0000000000 --- a/tez-mapreduce/src/main/resources/META-INF/NOTICE.txt +++ /dev/null @@ -1,6 +0,0 @@ -Apache Tez -Copyright (c) 2016 The Apache Software Foundation - -This product includes software developed at -The Apache Software Foundation (http://www.apache.org/). - diff --git a/tez-mapreduce/src/test/java/org/apache/hadoop/mapred/split/TestGroupedSplits.java b/tez-mapreduce/src/test/java/org/apache/hadoop/mapred/split/TestGroupedSplits.java index 3dce417459..9d700652a7 100644 --- a/tez-mapreduce/src/test/java/org/apache/hadoop/mapred/split/TestGroupedSplits.java +++ b/tez-mapreduce/src/test/java/org/apache/hadoop/mapred/split/TestGroupedSplits.java @@ -325,7 +325,7 @@ public void testGroupedSplitSize() throws IOException { for (int i=0; i st throw new UnsupportedOperationException("getVertexNumTasks not implemented in this mock"); } + @Override + public void addCounters(TezCounters tezCounters) { + throw new UnsupportedOperationException("addCounters not implemented in this mock"); + } + @Override public UserPayload getUserPayload() { throw new UnsupportedOperationException("getUserPayload not implemented in this mock"); diff --git a/tez-mapreduce/src/test/java/org/apache/tez/mapreduce/combine/TestMRCombiner.java b/tez-mapreduce/src/test/java/org/apache/tez/mapreduce/combine/TestMRCombiner.java index a796e59326..19bb8a0941 100644 --- a/tez-mapreduce/src/test/java/org/apache/tez/mapreduce/combine/TestMRCombiner.java +++ b/tez-mapreduce/src/test/java/org/apache/tez/mapreduce/combine/TestMRCombiner.java @@ -43,9 +43,12 @@ import org.apache.tez.runtime.library.common.sort.impl.IFile.Writer; import org.apache.tez.runtime.library.common.sort.impl.TezRawKeyValueIterator; import org.junit.Test; -import org.mockito.Mockito; import static org.junit.Assert.assertEquals; +import static org.mockito.Mockito.atLeastOnce; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; public class TestMRCombiner { @@ -56,7 +59,7 @@ public void testRunOldCombiner() throws IOException, InterruptedException { conf.setClass("mapred.combiner.class", OldReducer.class, Object.class); TaskContext taskContext = getTaskContext(conf); MRCombiner combiner = new MRCombiner(taskContext); - Writer writer = Mockito.mock(Writer.class); + Writer writer = mock(Writer.class); combiner.combine(new TezRawKeyValueIteratorTest(), writer); long inputRecords = taskContext.getCounters().findCounter(TaskCounter.COMBINE_INPUT_RECORDS).getValue(); long outputRecords = taskContext.getCounters().findCounter(TaskCounter.COMBINE_OUTPUT_RECORDS).getValue(); @@ -75,7 +78,7 @@ public void testRunNewCombiner() throws IOException, InterruptedException { Object.class); TaskContext taskContext = getTaskContext(conf); MRCombiner combiner = new MRCombiner(taskContext); - Writer writer = Mockito.mock(Writer.class); + Writer writer = mock(Writer.class); combiner.combine(new TezRawKeyValueIteratorTest(), writer); long inputRecords = taskContext.getCounters().findCounter(TaskCounter.COMBINE_INPUT_RECORDS).getValue(); long outputRecords = taskContext.getCounters().findCounter(TaskCounter.COMBINE_OUTPUT_RECORDS).getValue(); @@ -92,7 +95,7 @@ public void testTop2RunOldCombiner() throws IOException, InterruptedException { conf.setClass("mapred.combiner.class", Top2OldReducer.class, Object.class); TaskContext taskContext = getTaskContext(conf); MRCombiner combiner = new MRCombiner(taskContext); - Writer writer = Mockito.mock(Writer.class); + Writer writer = mock(Writer.class); combiner.combine(new TezRawKeyValueIteratorTest(), writer); long inputRecords = taskContext.getCounters().findCounter(TaskCounter.COMBINE_INPUT_RECORDS).getValue(); long outputRecords = taskContext.getCounters().findCounter(TaskCounter.COMBINE_OUTPUT_RECORDS).getValue(); @@ -109,7 +112,7 @@ public void testTop2RunNewCombiner() throws IOException, InterruptedException { Object.class); TaskContext taskContext = getTaskContext(conf); MRCombiner combiner = new MRCombiner(taskContext); - Writer writer = Mockito.mock(Writer.class); + Writer writer = mock(Writer.class); combiner.combine(new TezRawKeyValueIteratorTest(), writer); long inputRecords = taskContext.getCounters().findCounter(TaskCounter.COMBINE_INPUT_RECORDS).getValue(); long outputRecords = taskContext.getCounters().findCounter(TaskCounter.COMBINE_OUTPUT_RECORDS).getValue(); @@ -127,20 +130,20 @@ private void setKeyAndValueClassTypes(TezConfiguration conf) { private TaskContext getTaskContext(TezConfiguration conf) throws IOException { UserPayload payload = TezUtils.createUserPayloadFromConf(conf); - TaskContext taskContext = Mockito.mock(InputContext.class); - Mockito.when(taskContext.getUserPayload()).thenReturn(payload); - Mockito.when(taskContext.getCounters()).thenReturn(new TezCounters()); - Mockito.when(taskContext.getApplicationId()).thenReturn( + TaskContext taskContext = mock(InputContext.class); + when(taskContext.getUserPayload()).thenReturn(payload); + when(taskContext.getCounters()).thenReturn(new TezCounters()); + when(taskContext.getApplicationId()).thenReturn( ApplicationId.newInstance(123456, 1)); return taskContext; } private void verifyKeyAndValues(Writer writer) throws IOException { - Mockito.verify(writer, Mockito.atLeastOnce()).append(new Text("tez"), + verify(writer, atLeastOnce()).append(new Text("tez"), new IntWritable(3)); - Mockito.verify(writer, Mockito.atLeastOnce()).append(new Text("apache"), + verify(writer, atLeastOnce()).append(new Text("apache"), new IntWritable(1)); - Mockito.verify(writer, Mockito.atLeastOnce()).append(new Text("hadoop"), + verify(writer, atLeastOnce()).append(new Text("hadoop"), new IntWritable(2)); } @@ -152,7 +155,16 @@ private static class TezRawKeyValueIteratorTest implements @Override public boolean next() throws IOException { - if (i++ < keys.length - 1) { + boolean hasNext = hasNext(); + if (hasNext) { + i += 1; + } + + return hasNext; + } + + public boolean hasNext() throws IOException { + if (i < (keys.length - 1)) { return true; } return false; diff --git a/tez-mapreduce/src/test/java/org/apache/tez/mapreduce/common/TestMRInputAMSplitGenerator.java b/tez-mapreduce/src/test/java/org/apache/tez/mapreduce/common/TestMRInputAMSplitGenerator.java index 6cf2700564..9f6ac3b74f 100644 --- a/tez-mapreduce/src/test/java/org/apache/tez/mapreduce/common/TestMRInputAMSplitGenerator.java +++ b/tez-mapreduce/src/test/java/org/apache/tez/mapreduce/common/TestMRInputAMSplitGenerator.java @@ -96,7 +96,7 @@ private void testGroupSplitsAndSortSplits(boolean groupSplitsEnabled, UserPayload userPayload = dataSource.getInputDescriptor().getUserPayload(); InputInitializerContext context = - new TezTestUtils.TezRootInputInitializerContextForTest(userPayload); + new TezTestUtils.TezRootInputInitializerContextForTest(userPayload, new Configuration(false)); MRInputAMSplitGenerator splitGenerator = new MRInputAMSplitGenerator(context); diff --git a/tez-mapreduce/src/test/java/org/apache/tez/mapreduce/common/TestMRInputSplitDistributor.java b/tez-mapreduce/src/test/java/org/apache/tez/mapreduce/common/TestMRInputSplitDistributor.java index 3772cde946..4aaa7e2e76 100644 --- a/tez-mapreduce/src/test/java/org/apache/tez/mapreduce/common/TestMRInputSplitDistributor.java +++ b/tez-mapreduce/src/test/java/org/apache/tez/mapreduce/common/TestMRInputSplitDistributor.java @@ -70,7 +70,8 @@ public void testSerializedPayload() throws IOException { UserPayload userPayload = UserPayload.create(payloadProto.build().toByteString().asReadOnlyByteBuffer()); - InputInitializerContext context = new TezTestUtils.TezRootInputInitializerContextForTest(userPayload); + InputInitializerContext context = new TezTestUtils.TezRootInputInitializerContextForTest(userPayload, + new Configuration(false)); MRInputSplitDistributor splitDist = new MRInputSplitDistributor(context); List events = splitDist.initialize(); @@ -119,7 +120,8 @@ public void testDeserializedPayload() throws IOException { UserPayload userPayload = UserPayload.create(payloadProto.build().toByteString().asReadOnlyByteBuffer()); - InputInitializerContext context = new TezTestUtils.TezRootInputInitializerContextForTest(userPayload); + InputInitializerContext context = new TezTestUtils.TezRootInputInitializerContextForTest(userPayload, + new Configuration(false)); MRInputSplitDistributor splitDist = new MRInputSplitDistributor(context); List events = splitDist.initialize(); diff --git a/tez-mapreduce/src/test/java/org/apache/tez/mapreduce/hadoop/TestConfigTranslationMRToTez.java b/tez-mapreduce/src/test/java/org/apache/tez/mapreduce/hadoop/TestConfigTranslationMRToTez.java index deab64feef..df68c8dff8 100644 --- a/tez-mapreduce/src/test/java/org/apache/tez/mapreduce/hadoop/TestConfigTranslationMRToTez.java +++ b/tez-mapreduce/src/test/java/org/apache/tez/mapreduce/hadoop/TestConfigTranslationMRToTez.java @@ -70,6 +70,5 @@ public void testMRToTezKeyTranslation() { assertEquals(LongWritable.class.getName(), ConfigUtils .getIntermediateInputValueClass(confVertex1).getName()); assertTrue(ConfigUtils.shouldCompressIntermediateOutput(confVertex1)); - assertTrue(ConfigUtils.isIntermediateInputCompressed(confVertex1)); } } diff --git a/tez-mapreduce/src/test/java/org/apache/tez/mapreduce/hadoop/TestMRInputHelpers.java b/tez-mapreduce/src/test/java/org/apache/tez/mapreduce/hadoop/TestMRInputHelpers.java index 88cc4a51f1..a7501e8aed 100644 --- a/tez-mapreduce/src/test/java/org/apache/tez/mapreduce/hadoop/TestMRInputHelpers.java +++ b/tez-mapreduce/src/test/java/org/apache/tez/mapreduce/hadoop/TestMRInputHelpers.java @@ -19,6 +19,7 @@ package org.apache.tez.mapreduce.hadoop; import java.io.IOException; +import java.nio.file.Files; import java.util.ArrayList; import java.util.Arrays; import java.util.HashSet; @@ -29,6 +30,7 @@ import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.LocalFileSystem; import org.apache.hadoop.fs.LocatedFileStatus; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.RemoteIterator; @@ -40,30 +42,39 @@ import org.apache.hadoop.mapreduce.split.JobSplit; import org.apache.hadoop.mapreduce.split.SplitMetaInfoReader; import org.apache.hadoop.yarn.api.records.LocalResource; -import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.tez.dag.api.DataSourceDescriptor; import org.apache.tez.dag.api.TaskLocationHint; +import org.apache.tez.mapreduce.protos.MRRuntimeProtos.MRSplitProto; +import org.apache.tez.runtime.api.events.InputDataInformationEvent; +import org.junit.After; import org.junit.Assert; +import org.junit.Before; import org.junit.BeforeClass; import org.junit.Test; +import com.google.protobuf.ByteString; + public class TestMRInputHelpers { protected static MiniDFSCluster dfsCluster; private static Configuration conf = new Configuration(); private static FileSystem remoteFs; + private static LocalFileSystem localFs; private static Path testFilePath; private static Path oldSplitsDir; private static Path newSplitsDir; - private static String TEST_ROOT_DIR = "target" - + Path.SEPARATOR + TestMRHelpers.class.getName() + "-tmpDir"; + private static Path testRootDir; + private static Path localTestRootDir; @BeforeClass public static void setup() throws IOException { + testRootDir = new Path(Files.createTempDirectory(TestMRHelpers.class.getName()).toString()); + localTestRootDir = new Path(Files.createTempDirectory(TestMRHelpers.class.getName() + "-local").toString()); + try { - conf.set(MiniDFSCluster.HDFS_MINIDFS_BASEDIR, TEST_ROOT_DIR); + conf.set(MiniDFSCluster.HDFS_MINIDFS_BASEDIR, testRootDir.toString()); dfsCluster = new MiniDFSCluster.Builder(conf).numDataNodes(2) .format(true).racks(null).build(); remoteFs = dfsCluster.getFileSystem(); @@ -71,7 +82,7 @@ public static void setup() throws IOException { throw new RuntimeException("problem starting mini dfs cluster", io); } - Configuration testConf = new YarnConfiguration( + Configuration testConf = new Configuration( dfsCluster.getFileSystem().getConf()); @@ -99,6 +110,8 @@ public static void setup() throws IOException { oldSplitsDir = remoteFs.makeQualified(new Path("/tmp/splitsDirOld/")); newSplitsDir = remoteFs.makeQualified(new Path("/tmp/splitsDirNew/")); + + localFs = FileSystem.getLocal(conf); } @@ -189,6 +202,42 @@ public void testInputSplitLocalResourceCreation() throws Exception { MRInputHelpers.JOB_SPLIT_METAINFO_RESOURCE_NAME)); } + @Test + public void testInputEventSerializedPayload() throws IOException { + MRSplitProto proto = MRSplitProto.newBuilder().setSplitBytes(ByteString.copyFrom("splits".getBytes())).build(); + + InputDataInformationEvent initEvent = + InputDataInformationEvent.createWithSerializedPayload(0, proto.toByteString().asReadOnlyByteBuffer()); + MRSplitProto protoFromEvent = MRInputHelpers.getProto(initEvent, new JobConf(conf)); + + Assert.assertEquals(proto, protoFromEvent); + } + + @Test + public void testInputEventSerializedPath() throws IOException { + MRSplitProto proto = MRSplitProto.newBuilder().setSplitBytes(ByteString.copyFrom("splits".getBytes())).build(); + + Path splitsDir = localFs.resolvePath(localTestRootDir); + + Path serializedPath = new Path(splitsDir + Path.SEPARATOR + "splitpayload"); + + try (FSDataOutputStream out = localFs.create(serializedPath)) { + proto.writeTo(out); + } + + // event file is present on fs + Assert.assertTrue("Event file should be present on fs", localFs.exists(serializedPath)); + + InputDataInformationEvent initEvent = + InputDataInformationEvent.createWithSerializedPath(0, serializedPath.toUri().toString()); + MRSplitProto protoFromEvent = MRInputHelpers.getProto(initEvent, new JobConf(conf)); + + Assert.assertEquals(proto, protoFromEvent); + + // event file is deleted after read + Assert.assertFalse("Event file should be deleted after read", localFs.exists(serializedPath)); + } + private void verifyLocationHints(Path inputSplitsDir, List actual) throws Exception { JobID jobId = new JobID("dummy", 1); @@ -232,31 +281,30 @@ private DataSourceDescriptor generateDataSourceDescriptorMapRed(Path inputSplits @Test(timeout = 5000) public void testInputSplitLocalResourceCreationWithDifferentFS() throws Exception { - FileSystem localFs = FileSystem.getLocal(conf); - Path LOCAL_TEST_ROOT_DIR = new Path("target" - + Path.SEPARATOR + TestMRHelpers.class.getName() + "-localtmpDir"); - - try { - localFs.mkdirs(LOCAL_TEST_ROOT_DIR); - - Path splitsDir = localFs.resolvePath(LOCAL_TEST_ROOT_DIR); + Path splitsDir = localFs.resolvePath(localTestRootDir); - DataSourceDescriptor dataSource = generateDataSourceDescriptorMapRed(splitsDir); + DataSourceDescriptor dataSource = generateDataSourceDescriptorMapRed(splitsDir); - Map localResources = dataSource.getAdditionalLocalFiles(); + Map localResources = dataSource.getAdditionalLocalFiles(); - Assert.assertEquals(2, localResources.size()); - Assert.assertTrue(localResources.containsKey( - MRInputHelpers.JOB_SPLIT_RESOURCE_NAME)); - Assert.assertTrue(localResources.containsKey( - MRInputHelpers.JOB_SPLIT_METAINFO_RESOURCE_NAME)); + Assert.assertEquals(2, localResources.size()); + Assert.assertTrue(localResources.containsKey( + MRInputHelpers.JOB_SPLIT_RESOURCE_NAME)); + Assert.assertTrue(localResources.containsKey( + MRInputHelpers.JOB_SPLIT_METAINFO_RESOURCE_NAME)); - for (LocalResource lr : localResources.values()) { - Assert.assertFalse(lr.getResource().getScheme().contains(remoteFs.getScheme())); - } - } finally { - localFs.delete(LOCAL_TEST_ROOT_DIR, true); + for (LocalResource lr : localResources.values()) { + Assert.assertFalse(lr.getResource().getScheme().contains(remoteFs.getScheme())); } } + @Before + public void before() throws IOException { + localFs.mkdirs(localTestRootDir); + } + + @After + public void after() throws IOException { + localFs.delete(localTestRootDir, true); + } } diff --git a/tez-mapreduce/src/test/java/org/apache/tez/mapreduce/input/MRInputForTest.java b/tez-mapreduce/src/test/java/org/apache/tez/mapreduce/input/MRInputForTest.java new file mode 100644 index 0000000000..0d1d24ff6f --- /dev/null +++ b/tez-mapreduce/src/test/java/org/apache/tez/mapreduce/input/MRInputForTest.java @@ -0,0 +1,35 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tez.mapreduce.input; + +import org.apache.hadoop.conf.Configuration; +import org.apache.tez.runtime.api.InputContext; + +/** + * This is used for inspecting jobConf in test. + */ +public class MRInputForTest extends MRInput { + public MRInputForTest(InputContext inputContext, int numPhysicalInputs) { + super(inputContext, numPhysicalInputs); + } + + public Configuration getConfiguration() { + return jobConf; + } +} diff --git a/tez-mapreduce/src/test/java/org/apache/tez/mapreduce/input/MultiMRInputForTest.java b/tez-mapreduce/src/test/java/org/apache/tez/mapreduce/input/MultiMRInputForTest.java new file mode 100644 index 0000000000..f0f0a77aa7 --- /dev/null +++ b/tez-mapreduce/src/test/java/org/apache/tez/mapreduce/input/MultiMRInputForTest.java @@ -0,0 +1,35 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tez.mapreduce.input; + +import org.apache.hadoop.conf.Configuration; +import org.apache.tez.runtime.api.InputContext; + +/** + * This is used for inspecting jobConf in test. + */ +public class MultiMRInputForTest extends MultiMRInput { + public MultiMRInputForTest(InputContext inputContext, int numPhysicalInputs) { + super(inputContext, numPhysicalInputs); + } + + public Configuration getConfiguration() { + return jobConf; + } +} diff --git a/tez-mapreduce/src/test/java/org/apache/tez/mapreduce/input/TestMRInput.java b/tez-mapreduce/src/test/java/org/apache/tez/mapreduce/input/TestMRInput.java index 9109cd9c47..844ea51cf1 100644 --- a/tez-mapreduce/src/test/java/org/apache/tez/mapreduce/input/TestMRInput.java +++ b/tez-mapreduce/src/test/java/org/apache/tez/mapreduce/input/TestMRInput.java @@ -47,6 +47,7 @@ import org.apache.tez.mapreduce.protos.MRRuntimeProtos; import org.apache.tez.runtime.api.Event; import org.apache.tez.runtime.api.InputContext; +import org.apache.tez.runtime.api.InputStatisticsReporter; import org.apache.tez.runtime.api.events.InputDataInformationEvent; import org.junit.Test; @@ -69,6 +70,7 @@ public void test0PhysicalInputs() throws IOException { doReturn(1).when(inputContext).getTaskIndex(); doReturn(1).when(inputContext).getTaskAttemptNumber(); doReturn(new TezCounters()).when(inputContext).getCounters(); + doReturn(new JobConf(false)).when(inputContext).getContainerConfiguration(); MRInput mrInput = new MRInput(inputContext, 0); @@ -120,6 +122,7 @@ public void testAttributesInJobConf() throws Exception { doReturn(TEST_ATTRIBUTES_INPUT_NAME).when(inputContext).getSourceVertexName(); doReturn(TEST_ATTRIBUTES_APPLICATION_ID).when(inputContext).getApplicationId(); doReturn(TEST_ATTRIBUTES_UNIQUE_IDENTIFIER).when(inputContext).getUniqueIdentifier(); + doReturn(new Configuration(false)).when(inputContext).getContainerConfiguration(); DataSourceDescriptor dsd = MRInput.createConfigBuilder(new Configuration(false), @@ -147,6 +150,53 @@ public void testAttributesInJobConf() throws Exception { assertTrue(TestInputFormat.invoked.get()); } + @Test(timeout = 5000) + public void testConfigMerge() throws Exception { + JobConf jobConf = new JobConf(false); + jobConf.set("payload-key", "payload-value"); + + Configuration localConfig = new Configuration(false); + localConfig.set("local-key", "local-value"); + + InputContext inputContext = mock(InputContext.class); + + DataSourceDescriptor dsd = MRInput.createConfigBuilder(jobConf, + TestInputFormat.class).groupSplits(false).build(); + + doReturn(dsd.getInputDescriptor().getUserPayload()).when(inputContext).getUserPayload(); + doReturn(TEST_ATTRIBUTES_DAG_INDEX).when(inputContext).getDagIdentifier(); + doReturn(TEST_ATTRIBUTES_VERTEX_INDEX).when(inputContext).getTaskVertexIndex(); + doReturn(TEST_ATTRIBUTES_TASK_INDEX).when(inputContext).getTaskIndex(); + doReturn(TEST_ATTRIBUTES_TASK_ATTEMPT_INDEX).when(inputContext).getTaskAttemptNumber(); + doReturn(TEST_ATTRIBUTES_INPUT_INDEX).when(inputContext).getInputIndex(); + doReturn(TEST_ATTRIBUTES_DAG_ATTEMPT_NUMBER).when(inputContext).getDAGAttemptNumber(); + doReturn(TEST_ATTRIBUTES_DAG_NAME).when(inputContext).getDAGName(); + doReturn(TEST_ATTRIBUTES_VERTEX_NAME).when(inputContext).getTaskVertexName(); + doReturn(TEST_ATTRIBUTES_INPUT_NAME).when(inputContext).getSourceVertexName(); + doReturn(TEST_ATTRIBUTES_APPLICATION_ID).when(inputContext).getApplicationId(); + doReturn(TEST_ATTRIBUTES_UNIQUE_IDENTIFIER).when(inputContext).getUniqueIdentifier(); + doReturn(localConfig).when(inputContext).getContainerConfiguration(); + doReturn(new TezCounters()).when(inputContext).getCounters(); + + MRInputForTest input = new MRInputForTest(inputContext, 1); + input.initialize(); + + Configuration mergedConfig = input.getConfiguration(); + + assertEquals("local-value", mergedConfig.get("local-key")); + assertEquals("payload-value", mergedConfig.get("payload-key")); + } + + @Test + public void testMRInputCloseWithUnintializedReader() throws IOException { + InputContext inputContext = mock(InputContext.class); + doReturn(new TezCounters()).when(inputContext).getCounters(); + doReturn(new InputStatisticsReporterImplForTest()).when(inputContext).getStatisticsReporter(); + + MRInput mrInput = new MRInput(inputContext, 0); + mrInput.close(); // shouldn't throw NPE + } + /** * Test class to verify */ @@ -237,4 +287,15 @@ public void readFields(DataInput in) throws IOException { } } + + public static class InputStatisticsReporterImplForTest implements InputStatisticsReporter { + + @Override + public synchronized void reportDataSize(long size) { + } + + @Override + public void reportItemsProcessed(long items) { + } + } } diff --git a/tez-mapreduce/src/test/java/org/apache/tez/mapreduce/input/TestMultiMRInput.java b/tez-mapreduce/src/test/java/org/apache/tez/mapreduce/input/TestMultiMRInput.java index 8d77a0539b..bd6e891bd2 100644 --- a/tez-mapreduce/src/test/java/org/apache/tez/mapreduce/input/TestMultiMRInput.java +++ b/tez-mapreduce/src/test/java/org/apache/tez/mapreduce/input/TestMultiMRInput.java @@ -102,7 +102,7 @@ public void test0PhysicalInputs() throws Exception { jobConf.setInputFormat(org.apache.hadoop.mapred.SequenceFileInputFormat.class); FileInputFormat.setInputPaths(jobConf, workDir); - InputContext inputContext = createTezInputContext(jobConf); + InputContext inputContext = createTezInputContext(jobConf, new Configuration(false)); MultiMRInput mMrInput = new MultiMRInput(inputContext, 0); @@ -121,6 +121,25 @@ public void test0PhysicalInputs() throws Exception { } } + @Test(timeout = 5000) + public void testConfigMerge() throws Exception { + JobConf jobConf = new JobConf(false); + jobConf.set("payload-key", "payload-value"); + + Configuration localConfig = new Configuration(false); + localConfig.set("local-key", "local-value"); + + InputContext inputContext = createTezInputContext(jobConf, localConfig); + + MultiMRInputForTest input = new MultiMRInputForTest(inputContext, 1); + input.initialize(); + + Configuration mergedConfig = input.getConfiguration(); + + assertEquals("local-value", mergedConfig.get("local-key")); + assertEquals("payload-value", mergedConfig.get("payload-key")); + } + @Test(timeout = 5000) public void testSingleSplit() throws Exception { @@ -129,7 +148,7 @@ public void testSingleSplit() throws Exception { jobConf.setInputFormat(org.apache.hadoop.mapred.SequenceFileInputFormat.class); FileInputFormat.setInputPaths(jobConf, workDir); - InputContext inputContext = createTezInputContext(jobConf); + InputContext inputContext = createTezInputContext(jobConf, new Configuration(false)); MultiMRInput input = new MultiMRInput(inputContext, 1); input.initialize(); @@ -180,7 +199,7 @@ public void testNewFormatSplits() throws Exception { splitProto.toByteString().asReadOnlyByteBuffer()); // Create input context. - InputContext inputContext = createTezInputContext(conf); + InputContext inputContext = createTezInputContext(conf, new Configuration(false)); // Create the MR input object and process the event MultiMRInput input = new MultiMRInput(inputContext, 1); @@ -198,7 +217,7 @@ public void testMultipleSplits() throws Exception { jobConf.setInputFormat(org.apache.hadoop.mapred.SequenceFileInputFormat.class); FileInputFormat.setInputPaths(jobConf, workDir); - InputContext inputContext = createTezInputContext(jobConf); + InputContext inputContext = createTezInputContext(jobConf, new Configuration(false)); MultiMRInput input = new MultiMRInput(inputContext, 2); input.initialize(); @@ -265,7 +284,7 @@ public void testExtraEvents() throws Exception { jobConf.setInputFormat(org.apache.hadoop.mapred.SequenceFileInputFormat.class); FileInputFormat.setInputPaths(jobConf, workDir); - InputContext inputContext = createTezInputContext(jobConf); + InputContext inputContext = createTezInputContext(jobConf, new Configuration(false)); MultiMRInput input = new MultiMRInput(inputContext, 1); input.initialize(); @@ -308,10 +327,10 @@ private LinkedHashMap createSplits(int splitCount, Path work return data; } - private InputContext createTezInputContext(Configuration conf) throws Exception { + private InputContext createTezInputContext(Configuration payloadConf, Configuration baseConf) throws Exception { MRInputUserPayloadProto.Builder builder = MRInputUserPayloadProto.newBuilder(); builder.setGroupingEnabled(false); - builder.setConfigurationBytes(TezUtils.createByteStringFromConf(conf)); + builder.setConfigurationBytes(TezUtils.createByteStringFromConf(payloadConf)); byte[] payload = builder.build().toByteArray(); ApplicationId applicationId = ApplicationId.newInstance(10000, 1); @@ -330,6 +349,7 @@ private InputContext createTezInputContext(Configuration conf) throws Exception doReturn(UUID.randomUUID().toString()).when(inputContext).getUniqueIdentifier(); doReturn("taskVertexName").when(inputContext).getTaskVertexName(); doReturn(UserPayload.create(ByteBuffer.wrap(payload))).when(inputContext).getUserPayload(); + doReturn(baseConf).when(inputContext).getContainerConfiguration(); return inputContext; } diff --git a/tez-mapreduce/src/test/java/org/apache/tez/mapreduce/lib/TestKVReadersWithMR.java b/tez-mapreduce/src/test/java/org/apache/tez/mapreduce/lib/TestKVReadersWithMR.java index dad18deeca..21a92469e2 100644 --- a/tez-mapreduce/src/test/java/org/apache/tez/mapreduce/lib/TestKVReadersWithMR.java +++ b/tez-mapreduce/src/test/java/org/apache/tez/mapreduce/lib/TestKVReadersWithMR.java @@ -18,6 +18,7 @@ package org.apache.tez.mapreduce.lib; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.RecordReader; import org.apache.hadoop.mapreduce.InputSplit; @@ -25,12 +26,15 @@ import org.apache.tez.common.counters.TaskCounter; import org.apache.tez.common.counters.TezCounter; import org.apache.tez.common.counters.TezCounters; +import org.apache.tez.dag.api.TezConfiguration; import org.apache.tez.runtime.api.InputContext; import org.junit.Before; import org.junit.Test; import java.io.IOException; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNull; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; import static org.mockito.Mockito.mock; @@ -107,6 +111,47 @@ public void testWithSpecificNumberOfKV_MapReduce(int kvPairs) throws IOException } } + @Test + public void testIncrementalConfigWithMultipleProperties() throws IOException { + InputContext mockContext = mock(InputContext.class); + MRReaderMapred reader = new MRReaderMapred(conf, counters, inputRecordCounter, mockContext); + conf.set(TezConfiguration.TEZ_MRREADER_CONFIG_UPDATE_PROPERTIES, "column.names,does_not_exist,column.ids"); + conf.set("column.names", "first_name,last_name,id"); + conf.set("column.ids", "1,2,3"); + conf.set("random", "value"); + + Configuration incrementalConf = reader.getConfigUpdates(); + + assertEquals(2, incrementalConf.size()); + assertEquals("first_name,last_name,id", incrementalConf.get("column.names")); + assertEquals("1,2,3", incrementalConf.get("column.ids")); + } + + @Test + public void testIncrementalConfigWithSingleProperty() throws IOException { + InputContext mockContext = mock(InputContext.class); + MRReaderMapred reader = new MRReaderMapred(conf, counters, inputRecordCounter, mockContext); + conf.set(TezConfiguration.TEZ_MRREADER_CONFIG_UPDATE_PROPERTIES, "column.names"); + conf.set("column.names", "first_name,last_name,id"); + conf.set("random", "value"); + + Configuration incrementalConf = reader.getConfigUpdates(); + + assertEquals(1, incrementalConf.size()); + assertEquals("first_name,last_name,id", incrementalConf.get("column.names")); + } + + @Test + public void testIncrementalConfigWithZeroProperty() throws IOException { + InputContext mockContext = mock(InputContext.class); + MRReaderMapred reader = new MRReaderMapred(conf, counters, inputRecordCounter, mockContext); + conf.set("random", "value"); + + Configuration incrementalConf = reader.getConfigUpdates(); + + assertNull(incrementalConf); + } + static class DummyRecordReader implements RecordReader { int records; diff --git a/tez-mapreduce/src/test/java/org/apache/tez/mapreduce/output/TestMROutput.java b/tez-mapreduce/src/test/java/org/apache/tez/mapreduce/output/TestMROutput.java index f3403e655c..3359a6eda2 100644 --- a/tez-mapreduce/src/test/java/org/apache/tez/mapreduce/output/TestMROutput.java +++ b/tez-mapreduce/src/test/java/org/apache/tez/mapreduce/output/TestMROutput.java @@ -28,6 +28,7 @@ import java.util.HashMap; import java.util.List; +import com.google.common.io.Files; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; @@ -55,7 +56,9 @@ import org.apache.tez.hadoop.shim.DefaultHadoopShim; import org.apache.tez.mapreduce.TestUmbilical; import org.apache.tez.mapreduce.TezTestUtils; +import org.apache.tez.mapreduce.common.Utils; import org.apache.tez.mapreduce.hadoop.MRConfig; +import org.apache.tez.mapreduce.hadoop.MRJobConfig; import org.apache.tez.runtime.LogicalIOProcessorRuntimeTask; import org.apache.tez.runtime.api.OutputContext; import org.apache.tez.runtime.api.ProcessorContext; @@ -66,6 +69,7 @@ import org.apache.tez.runtime.api.impl.TezUmbilical; import org.apache.tez.runtime.library.api.KeyValueWriter; import org.apache.tez.runtime.library.processor.SimpleProcessor; +import org.junit.BeforeClass; import org.junit.Ignore; import org.junit.Test; @@ -75,16 +79,25 @@ public class TestMROutput { + static File tmpDir; + + @BeforeClass + public static void setupClass () { + tmpDir = Files.createTempDir(); + tmpDir.deleteOnExit(); + } + @Test(timeout = 5000) public void testNewAPI_TextOutputFormat() throws Exception { - String outputPath = "/tmp/output"; Configuration conf = new Configuration(); conf.setBoolean(MRConfig.IS_MAP_PROCESSOR, true); DataSinkDescriptor dataSink = MROutput - .createConfigBuilder(conf, TextOutputFormat.class, outputPath) + .createConfigBuilder(conf, TextOutputFormat.class, + tmpDir.getPath()) .build(); - OutputContext outputContext = createMockOutputContext(dataSink.getOutputDescriptor().getUserPayload()); + OutputContext outputContext = createMockOutputContext(dataSink.getOutputDescriptor().getUserPayload(), + new Configuration(false)); MROutput output = new MROutput(outputContext, 2); output.initialize(); @@ -99,16 +112,59 @@ public void testNewAPI_TextOutputFormat() throws Exception { assertEquals(FileOutputCommitter.class, output.committer.getClass()); } + @Test + public void testMergeConfig() throws Exception { + String outputPath = "/tmp/output"; + Configuration localConf = new Configuration(false); + localConf.set("local-key", "local-value"); + DataSinkDescriptor dataSink = MROutput + .createConfigBuilder(localConf, org.apache.hadoop.mapred.TextOutputFormat.class, outputPath) + .build(); + + Configuration baseConf = new Configuration(false); + baseConf.set("base-key", "base-value"); + + OutputContext outputContext = createMockOutputContext(dataSink.getOutputDescriptor().getUserPayload(), baseConf); + MROutput output = new MROutput(outputContext, 2); + output.initialize(); + + Configuration mergedConf = output.jobConf; + assertEquals("local-value", mergedConf.get("local-key")); + assertEquals("base-value", mergedConf.get("base-key")); + } + + @Test + public void testJobUUIDSet() throws Exception { + Configuration conf = new Configuration(); + conf.setBoolean(MRConfig.IS_MAP_PROCESSOR, true); + DataSinkDescriptor dataSink = MROutput + .createConfigBuilder(conf, TextOutputFormat.class, + tmpDir.getPath()) + .build(); + + OutputContext outputContext = createMockOutputContext(dataSink.getOutputDescriptor().getUserPayload(), + new Configuration(false)); + MROutput output = new MROutput(outputContext, 2); + output.initialize(); + String invalidDAGID = "invalid default"; + String dagID = output.jobConf.get(MRJobConfig.JOB_COMMITTER_UUID, invalidDAGID); + assertNotEquals(dagID, invalidDAGID); + assertNotEquals(output.jobConf.get(org.apache.hadoop.mapred.JobContext.TASK_ATTEMPT_ID), dagID); + assertEquals(dagID, Utils.getDAGID(outputContext)); + } + @Test(timeout = 5000) public void testOldAPI_TextOutputFormat() throws Exception { - String outputPath = "/tmp/output"; Configuration conf = new Configuration(); conf.setBoolean(MRConfig.IS_MAP_PROCESSOR, false); DataSinkDescriptor dataSink = MROutput - .createConfigBuilder(conf, org.apache.hadoop.mapred.TextOutputFormat.class, outputPath) + .createConfigBuilder(conf, + org.apache.hadoop.mapred.TextOutputFormat.class, + tmpDir.getPath()) .build(); - OutputContext outputContext = createMockOutputContext(dataSink.getOutputDescriptor().getUserPayload()); + OutputContext outputContext = createMockOutputContext(dataSink.getOutputDescriptor().getUserPayload(), + new Configuration(false)); MROutput output = new MROutput(outputContext, 2); output.initialize(); @@ -125,15 +181,16 @@ public void testOldAPI_TextOutputFormat() throws Exception { @Test(timeout = 5000) public void testNewAPI_SequenceFileOutputFormat() throws Exception { - String outputPath = "/tmp/output"; JobConf conf = new JobConf(); conf.setOutputKeyClass(NullWritable.class); conf.setOutputValueClass(Text.class); DataSinkDescriptor dataSink = MROutput - .createConfigBuilder(conf, SequenceFileOutputFormat.class, outputPath) + .createConfigBuilder(conf, SequenceFileOutputFormat.class, + tmpDir.getPath()) .build(); - OutputContext outputContext = createMockOutputContext(dataSink.getOutputDescriptor().getUserPayload()); + OutputContext outputContext = createMockOutputContext(dataSink.getOutputDescriptor().getUserPayload(), + new Configuration(false)); MROutput output = new MROutput(outputContext, 2); output.initialize(); assertEquals(true, output.useNewApi); @@ -149,15 +206,17 @@ public void testNewAPI_SequenceFileOutputFormat() throws Exception { @Test(timeout = 5000) public void testOldAPI_SequenceFileOutputFormat() throws Exception { - String outputPath = "/tmp/output"; JobConf conf = new JobConf(); conf.setOutputKeyClass(NullWritable.class); conf.setOutputValueClass(Text.class); DataSinkDescriptor dataSink = MROutput - .createConfigBuilder(conf, org.apache.hadoop.mapred.SequenceFileOutputFormat.class, outputPath) + .createConfigBuilder(conf, + org.apache.hadoop.mapred.SequenceFileOutputFormat.class, + tmpDir.getPath()) .build(); - OutputContext outputContext = createMockOutputContext(dataSink.getOutputDescriptor().getUserPayload()); + OutputContext outputContext = createMockOutputContext(dataSink.getOutputDescriptor().getUserPayload(), + new Configuration(false)); MROutput output = new MROutput(outputContext, 2); output.initialize(); assertEquals(false, output.useNewApi); @@ -175,14 +234,15 @@ public void testOldAPI_SequenceFileOutputFormat() throws Exception { // set while creating recordWriters @Test(timeout = 5000) public void testNewAPI_WorkOutputPathOutputFormat() throws Exception { - String outputPath = "/tmp/output"; Configuration conf = new Configuration(); conf.setBoolean(MRConfig.IS_MAP_PROCESSOR, true); DataSinkDescriptor dataSink = MROutput - .createConfigBuilder(conf, NewAPI_WorkOutputPathReadingOutputFormat.class, outputPath) + .createConfigBuilder(conf, NewAPI_WorkOutputPathReadingOutputFormat.class, + tmpDir.getPath()) .build(); - OutputContext outputContext = createMockOutputContext(dataSink.getOutputDescriptor().getUserPayload()); + OutputContext outputContext = createMockOutputContext(dataSink.getOutputDescriptor().getUserPayload(), + new Configuration(false)); MROutput output = new MROutput(outputContext, 2); output.initialize(); @@ -201,14 +261,15 @@ public void testNewAPI_WorkOutputPathOutputFormat() throws Exception { // set while creating recordWriters @Test(timeout = 5000) public void testOldAPI_WorkOutputPathOutputFormat() throws Exception { - String outputPath = "/tmp/output"; Configuration conf = new Configuration(); conf.setBoolean(MRConfig.IS_MAP_PROCESSOR, false); DataSinkDescriptor dataSink = MROutput - .createConfigBuilder(conf, OldAPI_WorkOutputPathReadingOutputFormat.class, outputPath) + .createConfigBuilder(conf, OldAPI_WorkOutputPathReadingOutputFormat.class, + tmpDir.getPath()) .build(); - OutputContext outputContext = createMockOutputContext(dataSink.getOutputDescriptor().getUserPayload()); + OutputContext outputContext = createMockOutputContext(dataSink.getOutputDescriptor().getUserPayload(), + new Configuration(false)); MROutput output = new MROutput(outputContext, 2); output.initialize(); @@ -223,7 +284,7 @@ public void testOldAPI_WorkOutputPathOutputFormat() throws Exception { assertEquals(org.apache.hadoop.mapred.FileOutputCommitter.class, output.committer.getClass()); } - private OutputContext createMockOutputContext(UserPayload payload) { + private OutputContext createMockOutputContext(UserPayload payload, Configuration baseConf) { OutputContext outputContext = mock(OutputContext.class); ApplicationId appId = ApplicationId.newInstance(System.currentTimeMillis(), 1); when(outputContext.getUserPayload()).thenReturn(payload); @@ -231,6 +292,7 @@ private OutputContext createMockOutputContext(UserPayload payload) { when(outputContext.getTaskVertexIndex()).thenReturn(1); when(outputContext.getTaskAttemptNumber()).thenReturn(1); when(outputContext.getCounters()).thenReturn(new TezCounters()); + when(outputContext.getContainerConfiguration()).thenReturn(baseConf); return outputContext; } diff --git a/tez-mapreduce/src/test/java/org/apache/tez/mapreduce/output/TestMROutputLegacy.java b/tez-mapreduce/src/test/java/org/apache/tez/mapreduce/output/TestMROutputLegacy.java index e4fa0ea9dc..60596be89d 100644 --- a/tez-mapreduce/src/test/java/org/apache/tez/mapreduce/output/TestMROutputLegacy.java +++ b/tez-mapreduce/src/test/java/org/apache/tez/mapreduce/output/TestMROutputLegacy.java @@ -23,6 +23,7 @@ import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Text; @@ -42,13 +43,16 @@ import org.apache.tez.runtime.api.OutputContext; import org.junit.Test; +import java.io.File; public class TestMROutputLegacy { + private static final File TEST_DIR = new File(System.getProperty("test.build.data"), + TestMROutputLegacy.class.getName()).getAbsoluteFile(); // simulate the behavior of translating MR to DAG using MR old API @Test (timeout = 5000) public void testOldAPI_MR() throws Exception { - String outputPath = "/tmp/output"; + String outputPath = TEST_DIR.getAbsolutePath(); JobConf conf = new JobConf(); conf.setOutputKeyClass(NullWritable.class); conf.setOutputValueClass(Text.class); @@ -79,7 +83,7 @@ public void testOldAPI_MR() throws Exception { // simulate the behavior of translating MR to DAG using MR new API @Test (timeout = 5000) public void testNewAPI_MR() throws Exception { - String outputPath = "/tmp/output"; + String outputPath = TEST_DIR.getAbsolutePath(); Job job = Job.getInstance(); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Text.class); @@ -111,7 +115,7 @@ public void testNewAPI_MR() throws Exception { // simulate the behavior of translating Mapper-only job to DAG using MR old API @Test (timeout = 5000) public void testOldAPI_MapperOnly() throws Exception { - String outputPath = "/tmp/output"; + String outputPath = TEST_DIR.getAbsolutePath(); JobConf conf = new JobConf(); conf.setOutputKeyClass(NullWritable.class); conf.setOutputValueClass(Text.class); @@ -142,7 +146,7 @@ public void testOldAPI_MapperOnly() throws Exception { //simulate the behavior of translating mapper-only job to DAG using MR new API @Test (timeout = 5000) public void testNewAPI_MapperOnly() throws Exception { - String outputPath = "/tmp/output"; + String outputPath = TEST_DIR.getAbsolutePath(); Job job = Job.getInstance(); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Text.class); @@ -179,6 +183,7 @@ private OutputContext createMockOutputContext(UserPayload payload) { when(outputContext.getTaskVertexIndex()).thenReturn(1); when(outputContext.getTaskAttemptNumber()).thenReturn(1); when(outputContext.getCounters()).thenReturn(new TezCounters()); + when(outputContext.getContainerConfiguration()).thenReturn(new Configuration(false)); return outputContext; } } diff --git a/tez-mapreduce/src/test/java/org/apache/tez/mapreduce/output/TestMultiMROutput.java b/tez-mapreduce/src/test/java/org/apache/tez/mapreduce/output/TestMultiMROutput.java index 3618e408c0..2662827678 100644 --- a/tez-mapreduce/src/test/java/org/apache/tez/mapreduce/output/TestMultiMROutput.java +++ b/tez-mapreduce/src/test/java/org/apache/tez/mapreduce/output/TestMultiMROutput.java @@ -23,6 +23,7 @@ import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter; @@ -39,10 +40,13 @@ import org.junit.Assert; import org.junit.Test; +import java.io.File; import java.io.IOException; public class TestMultiMROutput { + private static final File TEST_DIR = new File(System.getProperty("test.build.data"), + TestMultiMROutput.class.getName()).getAbsoluteFile(); @Test(timeout = 5000) public void testNewAPI_TextOutputFormat() throws Exception { @@ -104,6 +108,34 @@ public void testInvalidBasePath() throws Exception { } } + @Test + public void testMergeConf() throws Exception { + JobConf payloadConf = new JobConf(); + payloadConf.set("local-key", "local-value"); + DataSinkDescriptor dataSink = MultiMROutput.createConfigBuilder( + payloadConf, SequenceFileOutputFormat.class, "/output", false).build(); + + Configuration baseConf = new Configuration(false); + baseConf.set("base-key", "base-value"); + + OutputContext outputContext = mock(OutputContext.class); + ApplicationId appId = ApplicationId.newInstance(System.currentTimeMillis(), 1); + when(outputContext.getUserPayload()).thenReturn(dataSink.getOutputDescriptor().getUserPayload()); + when(outputContext.getApplicationId()).thenReturn(appId); + when(outputContext.getTaskVertexIndex()).thenReturn(1); + when(outputContext.getTaskAttemptNumber()).thenReturn(1); + when(outputContext.getCounters()).thenReturn(new TezCounters()); + when(outputContext.getStatisticsReporter()).thenReturn(mock(OutputStatisticsReporter.class)); + when(outputContext.getContainerConfiguration()).thenReturn(baseConf); + + MultiMROutput output = new MultiMROutput(outputContext, 2); + output.initialize(); + + Configuration mergedConf = output.jobConf; + assertEquals("base-value", mergedConf.get("base-key")); + assertEquals("local-value", mergedConf.get("local-key")); + } + private OutputContext createMockOutputContext(UserPayload payload) { OutputContext outputContext = mock(OutputContext.class); ApplicationId appId = ApplicationId.newInstance(System.currentTimeMillis(), 1); @@ -114,6 +146,7 @@ private OutputContext createMockOutputContext(UserPayload payload) { when(outputContext.getCounters()).thenReturn(new TezCounters()); when(outputContext.getStatisticsReporter()).thenReturn( mock(OutputStatisticsReporter.class)); + when(outputContext.getContainerConfiguration()).thenReturn(new Configuration(false)); return outputContext; } @@ -176,7 +209,7 @@ private void validate(boolean expectedUseNewAPIValue, Class outputFormat, private MultiMROutput createMROutputs(Class outputFormat, boolean isMapper, boolean useLazyOutputFormat) throws InterruptedException, IOException { - String outputPath = "/tmp/output"; + String outputPath = TEST_DIR.getAbsolutePath(); JobConf conf = new JobConf(); conf.setBoolean(MRConfig.IS_MAP_PROCESSOR, isMapper); conf.setOutputKeyClass(Text.class); diff --git a/tez-mapreduce/src/test/java/org/apache/tez/mapreduce/processor/MapUtils.java b/tez-mapreduce/src/test/java/org/apache/tez/mapreduce/processor/MapUtils.java index 6bfc5badd5..ba1acdf981 100644 --- a/tez-mapreduce/src/test/java/org/apache/tez/mapreduce/processor/MapUtils.java +++ b/tez-mapreduce/src/test/java/org/apache/tez/mapreduce/processor/MapUtils.java @@ -71,9 +71,11 @@ import com.google.common.collect.HashMultimap; -public class MapUtils { +public final class MapUtils { private static final Logger LOG = LoggerFactory.getLogger(MapUtils.class); + + private MapUtils() {} public static void configureLocalDirs(Configuration conf, String localDir) throws IOException { @@ -101,7 +103,7 @@ public static void configureLocalDirs(Configuration conf, String localDir) // JOB_LOCAL_DIR doesn't exist on this host -- Create it. workDir = lDirAlloc.getLocalPathForWrite("work", conf); FileSystem lfs = FileSystem.getLocal(conf).getRaw(); - boolean madeDir = false; + boolean madeDir; try { madeDir = lfs.mkdirs(workDir); } catch (FileAlreadyExistsException e) { @@ -127,8 +129,8 @@ public static void configureLocalDirs(Configuration conf, String localDir) LOG.info("Generating data at path: " + file); // create a file with length entries @SuppressWarnings("deprecation") - SequenceFile.Writer writer = - SequenceFile.createWriter(fs, job, file, + SequenceFile.Writer writer = + SequenceFile.createWriter(fs, job, file, LongWritable.class, Text.class); try { Random r = new Random(System.currentTimeMillis()); @@ -144,8 +146,8 @@ public static void configureLocalDirs(Configuration conf, String localDir) writer.close(); } - SequenceFileInputFormat format = - new SequenceFileInputFormat(); + SequenceFileInputFormat format = + new SequenceFileInputFormat<>(); InputSplit[] splits = format.getSplits(job, 1); System.err.println("#split = " + splits.length + " ; " + "#locs = " + splits[0].getLocations().length + "; " + @@ -175,7 +177,7 @@ private static void writeSplitFiles(FileSystem fs, JobConf conf, String[] locations = split.getLocations(); - SplitMetaInfo info = null; + SplitMetaInfo info; info = new JobSplit.SplitMetaInfo(locations, offset, split.getLength()); Path jobSplitMetaInfoFile = new Path( @@ -209,7 +211,7 @@ public static LogicalIOProcessorRuntimeTask createLogicalTask(FileSystem fs, Pat MapProcessor.class.getName()).setUserPayload( TezUtils.createUserPayloadFromConf(jobConf)); - Token shuffleToken = new Token(); + Token shuffleToken = new Token<>(); TaskSpec taskSpec = new TaskSpec( TezTestUtils.getMockTaskAttemptId(0, 0, mapId, 0), @@ -218,18 +220,18 @@ public static LogicalIOProcessorRuntimeTask createLogicalTask(FileSystem fs, Pat inputSpecs, outputSpecs, null, null); - Map serviceConsumerMetadata = new HashMap(); + Map serviceConsumerMetadata = new HashMap<>(); String auxiliaryService = jobConf.get(TezConfiguration.TEZ_AM_SHUFFLE_AUXILIARY_SERVICE_ID, TezConfiguration.TEZ_AM_SHUFFLE_AUXILIARY_SERVICE_ID_DEFAULT); serviceConsumerMetadata.put(auxiliaryService, ShuffleUtils.convertJobTokenToBytes(shuffleToken)); - Map envMap = new HashMap(); + Map envMap = new HashMap<>(); ByteBuffer shufflePortBb = ByteBuffer.allocate(4).putInt(0, 8000); AuxiliaryServiceHelper .setServiceDataIntoEnv(auxiliaryService, shufflePortBb, envMap); - LogicalIOProcessorRuntimeTask task = new LogicalIOProcessorRuntimeTask( + return new LogicalIOProcessorRuntimeTask( taskSpec, 0, jobConf, @@ -237,8 +239,7 @@ public static LogicalIOProcessorRuntimeTask createLogicalTask(FileSystem fs, Pat umbilical, serviceConsumerMetadata, envMap, - HashMultimap.create(), null, "", new ExecutionContextImpl("localhost"), + HashMultimap.create(), null, "", new ExecutionContextImpl("localhost"), Runtime.getRuntime().maxMemory(), true, new DefaultHadoopShim(), sharedExecutor); - return task; } } diff --git a/tez-mapreduce/src/test/java/org/apache/tez/mapreduce/processor/reduce/TestReduceProcessor.java b/tez-mapreduce/src/test/java/org/apache/tez/mapreduce/processor/reduce/TestReduceProcessor.java index 17c790341f..0223482da6 100644 --- a/tez-mapreduce/src/test/java/org/apache/tez/mapreduce/processor/reduce/TestReduceProcessor.java +++ b/tez-mapreduce/src/test/java/org/apache/tez/mapreduce/processor/reduce/TestReduceProcessor.java @@ -117,6 +117,7 @@ public void setUpJobConf(JobConf job) { TezTaskOutput.class); job.set(TezRuntimeConfiguration.TEZ_RUNTIME_PARTITIONER_CLASS, MRPartitioner.class.getName()); job.setNumReduceTasks(1); + job.setInt(MRJobConfig.FILEOUTPUTCOMMITTER_ALGORITHM_VERSION, 1); } @Before diff --git a/tez-mapreduce/src/test/resources/META-INF/LICENSE.txt b/tez-mapreduce/src/test/resources/META-INF/LICENSE similarity index 100% rename from tez-mapreduce/src/test/resources/META-INF/LICENSE.txt rename to tez-mapreduce/src/test/resources/META-INF/LICENSE diff --git a/tez-mapreduce/src/test/resources/META-INF/NOTICE b/tez-mapreduce/src/test/resources/META-INF/NOTICE new file mode 100644 index 0000000000..2595905699 --- /dev/null +++ b/tez-mapreduce/src/test/resources/META-INF/NOTICE @@ -0,0 +1,6 @@ +Apache Tez +Copyright 2014-2024 The Apache Software Foundation + +This product includes software developed at +The Apache Software Foundation (http://www.apache.org/). + diff --git a/tez-mapreduce/src/test/resources/META-INF/NOTICE.txt b/tez-mapreduce/src/test/resources/META-INF/NOTICE.txt deleted file mode 100644 index 3f36fcc6ba..0000000000 --- a/tez-mapreduce/src/test/resources/META-INF/NOTICE.txt +++ /dev/null @@ -1,6 +0,0 @@ -Apache Tez -Copyright (c) 2016 The Apache Software Foundation - -This product includes software developed at -The Apache Software Foundation (http://www.apache.org/). - diff --git a/tez-plugins/pom.xml b/tez-plugins/pom.xml index e524077b9f..595c123ce5 100644 --- a/tez-plugins/pom.xml +++ b/tez-plugins/pom.xml @@ -21,7 +21,7 @@ org.apache.tez tez - 0.9.1-SNAPSHOT + 0.10.5-SNAPSHOT tez-plugins pom @@ -30,11 +30,10 @@ hadoop27 - - !skipATS - + false + tez-protobuf-history-plugin tez-yarn-timeline-history tez-yarn-timeline-history-with-acls tez-history-parser @@ -44,9 +43,10 @@ hadoop28 - false + true + tez-protobuf-history-plugin tez-yarn-timeline-history tez-yarn-timeline-history-with-acls tez-yarn-timeline-cache-plugin diff --git a/tez-plugins/tez-aux-services/findbugs-exclude.xml b/tez-plugins/tez-aux-services/findbugs-exclude.xml index 5b11308f6d..adfd7041a5 100644 --- a/tez-plugins/tez-aux-services/findbugs-exclude.xml +++ b/tez-plugins/tez-aux-services/findbugs-exclude.xml @@ -12,5 +12,10 @@ limitations under the License. See accompanying LICENSE file. --> - + + + + + + diff --git a/tez-plugins/tez-aux-services/pom.xml b/tez-plugins/tez-aux-services/pom.xml index c61fefe11f..8b8a7d3431 100644 --- a/tez-plugins/tez-aux-services/pom.xml +++ b/tez-plugins/tez-aux-services/pom.xml @@ -20,7 +20,7 @@ tez-plugins org.apache.tez - 0.9.1-SNAPSHOT + 0.10.5-SNAPSHOT tez-aux-services @@ -55,6 +55,12 @@ org.apache.hadoop hadoop-hdfs provided + + + io.netty + * + +
    org.apache.hadoop @@ -106,7 +112,7 @@ io.netty - netty + netty-all com.google.guava @@ -118,7 +124,7 @@ org.mockito - mockito-all + mockito-core test @@ -126,6 +132,12 @@ hadoop-hdfs test test-jar + + + io.netty + * + + org.apache.tez @@ -183,6 +195,7 @@ org.apache.maven.plugins maven-shade-plugin + ${maven-shade-plugin.version} package @@ -194,7 +207,7 @@ true - log4j:log4j + ch.qos.reload4j:reload4j org.slf4j:* @@ -202,6 +215,7 @@ *:* + javax/** META-INF/*.SF META-INF/*.DSA META-INF/*.RSA @@ -225,16 +239,9 @@ org.apache.tez.shaded.$0 - org.jboss.netty + io.netty org.apache.tez.shaded.$0 - - javax - org.apache.tez.shaded.$0 - - javax.crypto.* - - diff --git a/tez-plugins/tez-aux-services/src/main/java/org/apache/tez/auxservices/FadvisedChunkedFile.java b/tez-plugins/tez-aux-services/src/main/java/org/apache/tez/auxservices/FadvisedChunkedFile.java index cc3f762f9d..162feb9801 100644 --- a/tez-plugins/tez-aux-services/src/main/java/org/apache/tez/auxservices/FadvisedChunkedFile.java +++ b/tez-plugins/tez-aux-services/src/main/java/org/apache/tez/auxservices/FadvisedChunkedFile.java @@ -31,7 +31,9 @@ import static org.apache.hadoop.io.nativeio.NativeIO.POSIX.POSIX_FADV_DONTNEED; -import org.jboss.netty.handler.stream.ChunkedFile; +import io.netty.buffer.ByteBuf; +import io.netty.channel.ChannelHandlerContext; +import io.netty.handler.stream.ChunkedFile; public class FadvisedChunkedFile extends ChunkedFile { @@ -57,13 +59,13 @@ public FadvisedChunkedFile(RandomAccessFile file, long position, long count, } @Override - public Object nextChunk() throws Exception { + public ByteBuf readChunk(ChannelHandlerContext ctx) throws Exception { if (manageOsCache && readaheadPool != null) { readaheadRequest = readaheadPool - .readaheadStream(identifier, fd, getCurrentOffset(), readaheadLength, - getEndOffset(), readaheadRequest); + .readaheadStream(identifier, fd, currentOffset(), readaheadLength, + endOffset(), readaheadRequest); } - return super.nextChunk(); + return super.readChunk(ctx); } @Override @@ -71,11 +73,11 @@ public void close() throws Exception { if (readaheadRequest != null) { readaheadRequest.cancel(); } - if (manageOsCache && getEndOffset() - getStartOffset() > 0) { + if (manageOsCache && endOffset() - startOffset() > 0) { try { NativeIO.POSIX.getCacheManipulator().posixFadviseIfPossible(identifier, fd, - getStartOffset(), getEndOffset() - getStartOffset(), + startOffset(), endOffset() - startOffset(), POSIX_FADV_DONTNEED); } catch (Throwable t) { LOG.warn("Failed to manage OS cache for " + identifier, t); diff --git a/tez-plugins/tez-aux-services/src/main/java/org/apache/tez/auxservices/FadvisedFileRegion.java b/tez-plugins/tez-aux-services/src/main/java/org/apache/tez/auxservices/FadvisedFileRegion.java index 40789d806d..23663636e5 100644 --- a/tez-plugins/tez-aux-services/src/main/java/org/apache/tez/auxservices/FadvisedFileRegion.java +++ b/tez-plugins/tez-aux-services/src/main/java/org/apache/tez/auxservices/FadvisedFileRegion.java @@ -34,7 +34,7 @@ import static org.apache.hadoop.io.nativeio.NativeIO.POSIX.POSIX_FADV_DONTNEED; -import org.jboss.netty.channel.DefaultFileRegion; +import io.netty.channel.DefaultFileRegion; import com.google.common.annotations.VisibleForTesting; @@ -54,6 +54,7 @@ public class FadvisedFileRegion extends DefaultFileRegion { private final FileChannel fileChannel; private ReadaheadRequest readaheadRequest; + private boolean transferred = false; public FadvisedFileRegion(RandomAccessFile file, long position, long count, boolean manageOsCache, int readaheadLength, ReadaheadPool readaheadPool, @@ -77,15 +78,40 @@ public long transferTo(WritableByteChannel target, long position) throws IOException { if (readaheadPool != null && readaheadLength > 0) { readaheadRequest = readaheadPool.readaheadStream(identifier, fd, - getPosition() + position, readaheadLength, - getPosition() + getCount(), readaheadRequest); + position() + position, readaheadLength, + position() + count(), readaheadRequest); } + long written = 0; if(this.shuffleTransferToAllowed) { - return super.transferTo(target, position); + written = super.transferTo(target, position); } else { - return customShuffleTransfer(target, position); + written = customShuffleTransfer(target, position); } + /* + * At this point, we can assume that the transfer was successful. + */ + transferred = true; + return written; + } + + /** + * Since Netty4, deallocate() is called automatically during cleanup, but before the + * ChannelFutureListeners. Deallocate calls FileChannel.close() and makes the file descriptor + * invalid, so every OS cache operation (e.g. posix_fadvice) with the original file descriptor + * will fail after this operation, so we need to take care of cleanup operations here (before + * deallocating) instead of listeners outside. + */ + @Override + protected void deallocate() { + if (readaheadRequest != null) { + readaheadRequest.cancel(); + } + + if (transferred) { + transferSuccessful(); + } + super.deallocate(); } /** @@ -142,24 +168,19 @@ long customShuffleTransfer(WritableByteChannel target, long position) return actualCount - trans; } - - @Override - public void releaseExternalResources() { - if (readaheadRequest != null) { - readaheadRequest.cancel(); - } - super.releaseExternalResources(); - } - /** * Call when the transfer completes successfully so we can advise the OS that * we don't need the region to be cached anymore. */ public void transferSuccessful() { - if (manageOsCache && getCount() > 0) { + if (manageOsCache && count() > 0) { try { - NativeIO.POSIX.getCacheManipulator().posixFadviseIfPossible(identifier, - fd, getPosition(), getCount(), POSIX_FADV_DONTNEED); + if (fd.valid()) { + NativeIO.POSIX.getCacheManipulator().posixFadviseIfPossible(identifier, fd, position(), + count(), POSIX_FADV_DONTNEED); + } else { + LOG.debug("File descriptor is not valid anymore, skipping posix_fadvise: " + identifier); + } } catch (Throwable t) { LOG.warn("Failed to manage OS cache for " + identifier, t); } diff --git a/tez-plugins/tez-aux-services/src/main/java/org/apache/tez/auxservices/IndexCache.java b/tez-plugins/tez-aux-services/src/main/java/org/apache/tez/auxservices/IndexCache.java index 5a945c422f..54db975292 100644 --- a/tez-plugins/tez-aux-services/src/main/java/org/apache/tez/auxservices/IndexCache.java +++ b/tez-plugins/tez-aux-services/src/main/java/org/apache/tez/auxservices/IndexCache.java @@ -19,6 +19,7 @@ import java.util.concurrent.atomic.AtomicInteger; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.tez.runtime.library.common.Constants; import org.apache.tez.runtime.library.common.sort.impl.TezIndexRecord; @@ -38,11 +39,22 @@ class IndexCache { private final LinkedBlockingQueue queue = new LinkedBlockingQueue(); + private FileSystem fs; + public static final String INDEX_CACHE_MB = "tez.shuffle.indexcache.mb"; public IndexCache(Configuration conf) { this.conf = conf; - totalMemoryAllowed = 10 * 1024 * 1024; + totalMemoryAllowed = conf.getInt(INDEX_CACHE_MB, 10) * 1024 * 1024; LOG.info("IndexCache created with max memory = " + totalMemoryAllowed); + initLocalFs(); + } + + private void initLocalFs() { + try { + this.fs = FileSystem.getLocal(conf).getRaw(); + } catch (IOException e) { + throw new RuntimeException(e); + } } /** @@ -72,9 +84,7 @@ public TezSpillRecord getSpillRecord(String mapId, Path fileName, String expecte } } } - if (LOG.isDebugEnabled()) { - LOG.debug("IndexCache HIT: MapId " + mapId + " found"); - } + LOG.debug("IndexCache HIT: MapId {} found", mapId); } if (info.mapSpillRecord.size() == 0) { @@ -113,9 +123,7 @@ public TezIndexRecord getIndexInformation(String mapId, int reduce, } } } - if (LOG.isDebugEnabled()) { - LOG.debug("IndexCache HIT: MapId " + mapId + " found"); - } + LOG.debug("IndexCache HIT: MapId {} found", mapId); } if (info.mapSpillRecord.size() == 0 || @@ -149,17 +157,14 @@ private IndexInformation readIndexFileToCache(Path indexFileName, } } } - if (LOG.isDebugEnabled()) { - LOG.debug("IndexCache HIT: MapId " + mapId + " found"); - } + LOG.debug("IndexCache HIT: MapId {} found", mapId); return info; } - if (LOG.isDebugEnabled()) { - LOG.debug("IndexCache MISS: MapId " + mapId + " not found"); - } + LOG.debug("IndexCache MISS: MapId {} not found", mapId); + TezSpillRecord tmp = null; try { - tmp = new TezSpillRecord(indexFileName, conf, expectedIndexOwner); + tmp = new TezSpillRecord(indexFileName, fs, expectedIndexOwner); } catch (Throwable e) { tmp = new TezSpillRecord(0); cache.remove(mapId); diff --git a/tez-plugins/tez-aux-services/src/main/java/org/apache/tez/auxservices/ShuffleHandler.java b/tez-plugins/tez-aux-services/src/main/java/org/apache/tez/auxservices/ShuffleHandler.java index a4d1495ba9..ce57978ca1 100644 --- a/tez-plugins/tez-aux-services/src/main/java/org/apache/tez/auxservices/ShuffleHandler.java +++ b/tez-plugins/tez-aux-services/src/main/java/org/apache/tez/auxservices/ShuffleHandler.java @@ -18,19 +18,23 @@ package org.apache.tez.auxservices; +import org.apache.hadoop.fs.RemoteIterator; +import org.apache.hadoop.util.DiskChecker; import static org.fusesource.leveldbjni.JniDBFactory.asString; import static org.fusesource.leveldbjni.JniDBFactory.bytes; -import static org.jboss.netty.buffer.ChannelBuffers.wrappedBuffer; -import static org.jboss.netty.handler.codec.http.HttpHeaders.Names.CONTENT_TYPE; -import static org.jboss.netty.handler.codec.http.HttpMethod.GET; -import static org.jboss.netty.handler.codec.http.HttpResponseStatus.BAD_REQUEST; -import static org.jboss.netty.handler.codec.http.HttpResponseStatus.FORBIDDEN; -import static org.jboss.netty.handler.codec.http.HttpResponseStatus.INTERNAL_SERVER_ERROR; -import static org.jboss.netty.handler.codec.http.HttpResponseStatus.METHOD_NOT_ALLOWED; -import static org.jboss.netty.handler.codec.http.HttpResponseStatus.NOT_FOUND; -import static org.jboss.netty.handler.codec.http.HttpResponseStatus.OK; -import static org.jboss.netty.handler.codec.http.HttpResponseStatus.UNAUTHORIZED; -import static org.jboss.netty.handler.codec.http.HttpVersion.HTTP_1_1; +import static io.netty.buffer.Unpooled.wrappedBuffer; +import static io.netty.handler.codec.http.HttpHeaders.Names.CONTENT_TYPE; +import static io.netty.handler.codec.http.HttpMethod.GET; +import static io.netty.handler.codec.http.HttpResponseStatus.BAD_REQUEST; +import static io.netty.handler.codec.http.HttpResponseStatus.FORBIDDEN; +import static io.netty.handler.codec.http.HttpResponseStatus.INTERNAL_SERVER_ERROR; +import static io.netty.handler.codec.http.HttpResponseStatus.METHOD_NOT_ALLOWED; +import static io.netty.handler.codec.http.HttpResponseStatus.NOT_FOUND; +import static io.netty.handler.codec.http.HttpResponseStatus.OK; +import static io.netty.handler.codec.http.HttpResponseStatus.UNAUTHORIZED; +import static io.netty.handler.codec.http.HttpVersion.HTTP_1_1; +import io.netty.handler.logging.LogLevel; +import io.netty.handler.logging.LoggingHandler; import java.io.File; import java.io.FileNotFoundException; @@ -47,7 +51,7 @@ import java.util.Map; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ExecutionException; -import java.util.concurrent.Executors; +import java.util.concurrent.ThreadFactory; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicInteger; import java.util.regex.Pattern; @@ -56,7 +60,8 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileContext; -import org.apache.hadoop.fs.LocalDirAllocator; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.DataInputByteBuffer; import org.apache.hadoop.io.DataOutputBuffer; @@ -70,6 +75,7 @@ import org.apache.tez.common.security.JobTokenSecretManager; import org.apache.tez.runtime.library.common.Constants; import org.apache.tez.runtime.library.common.security.SecureShuffleUtils; +import org.apache.tez.runtime.library.common.shuffle.api.ShuffleHandlerError; import org.apache.tez.runtime.library.common.shuffle.orderedgrouped.ShuffleHeader; import org.apache.hadoop.metrics2.MetricsSystem; import org.apache.hadoop.metrics2.annotation.Metric; @@ -83,6 +89,7 @@ import org.apache.hadoop.security.token.Token; import org.apache.tez.runtime.library.common.sort.impl.TezIndexRecord; import org.apache.hadoop.util.Shell; +import org.apache.hadoop.util.DiskChecker.DiskErrorException; import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.proto.YarnServerCommonProtos.VersionProto; @@ -99,46 +106,46 @@ import org.iq80.leveldb.DBException; import org.iq80.leveldb.Logger; import org.iq80.leveldb.Options; -import org.jboss.netty.bootstrap.ServerBootstrap; -import org.jboss.netty.buffer.ChannelBuffers; -import org.jboss.netty.channel.Channel; -import org.jboss.netty.channel.ChannelFactory; -import org.jboss.netty.channel.ChannelFuture; -import org.jboss.netty.channel.ChannelFutureListener; -import org.jboss.netty.channel.ChannelHandler; -import org.jboss.netty.channel.ChannelHandlerContext; -import org.jboss.netty.channel.ChannelPipeline; -import org.jboss.netty.channel.ChannelPipelineFactory; -import org.jboss.netty.channel.ChannelStateEvent; -import org.jboss.netty.channel.Channels; -import org.jboss.netty.channel.ExceptionEvent; -import org.jboss.netty.channel.MessageEvent; -import org.jboss.netty.channel.SimpleChannelUpstreamHandler; -import org.jboss.netty.channel.group.ChannelGroup; -import org.jboss.netty.channel.group.DefaultChannelGroup; -import org.jboss.netty.channel.socket.nio.NioServerBossPool; -import org.jboss.netty.channel.socket.nio.NioServerSocketChannelFactory; -import org.jboss.netty.channel.socket.nio.NioWorkerPool; -import org.jboss.netty.handler.codec.frame.TooLongFrameException; -import org.jboss.netty.handler.codec.http.DefaultHttpResponse; -import org.jboss.netty.handler.codec.http.HttpChunkAggregator; -import org.jboss.netty.handler.codec.http.HttpHeaders; -import org.jboss.netty.handler.codec.http.HttpRequest; -import org.jboss.netty.handler.codec.http.HttpRequestDecoder; -import org.jboss.netty.handler.codec.http.HttpResponse; -import org.jboss.netty.handler.codec.http.HttpResponseEncoder; -import org.jboss.netty.handler.codec.http.HttpResponseStatus; -import org.jboss.netty.handler.codec.http.QueryStringDecoder; -import org.jboss.netty.handler.ssl.SslHandler; -import org.jboss.netty.handler.stream.ChunkedWriteHandler; -import org.jboss.netty.handler.timeout.IdleState; -import org.jboss.netty.handler.timeout.IdleStateAwareChannelHandler; -import org.jboss.netty.handler.timeout.IdleStateEvent; -import org.jboss.netty.handler.timeout.IdleStateHandler; -import org.jboss.netty.util.CharsetUtil; -import org.jboss.netty.util.HashedWheelTimer; -import org.jboss.netty.util.ThreadNameDeterminer; -import org.jboss.netty.util.Timer; + +import io.netty.bootstrap.ServerBootstrap; +import io.netty.buffer.ByteBuf; +import io.netty.buffer.Unpooled; +import io.netty.channel.Channel; +import io.netty.channel.ChannelHandler.Sharable; +import io.netty.channel.ChannelDuplexHandler; +import io.netty.channel.ChannelFuture; +import io.netty.channel.ChannelFutureListener; +import io.netty.channel.ChannelHandlerContext; +import io.netty.channel.ChannelInboundHandlerAdapter; +import io.netty.channel.ChannelInitializer; +import io.netty.channel.ChannelOption; +import io.netty.channel.ChannelPipeline; +import io.netty.channel.group.ChannelGroup; +import io.netty.channel.group.DefaultChannelGroup; +import io.netty.channel.nio.NioEventLoopGroup; +import io.netty.channel.socket.nio.NioServerSocketChannel; +import io.netty.channel.socket.nio.NioSocketChannel; +import io.netty.handler.codec.TooLongFrameException; +import io.netty.handler.codec.http.DefaultFullHttpResponse; +import io.netty.handler.codec.http.DefaultHttpResponse; +import io.netty.handler.codec.http.FullHttpResponse; +import io.netty.handler.codec.http.HttpHeaders; +import io.netty.handler.codec.http.HttpObjectAggregator; +import io.netty.handler.codec.http.HttpRequest; +import io.netty.handler.codec.http.HttpRequestDecoder; +import io.netty.handler.codec.http.HttpResponse; +import io.netty.handler.codec.http.HttpResponseEncoder; +import io.netty.handler.codec.http.HttpResponseStatus; +import io.netty.handler.codec.http.LastHttpContent; +import io.netty.handler.codec.http.QueryStringDecoder; +import io.netty.handler.ssl.SslHandler; +import io.netty.handler.stream.ChunkedWriteHandler; +import io.netty.handler.timeout.IdleState; +import io.netty.handler.timeout.IdleStateEvent; +import io.netty.handler.timeout.IdleStateHandler; +import io.netty.util.CharsetUtil; +import io.netty.util.concurrent.GlobalEventExecutor; + import org.slf4j.LoggerFactory; import com.google.common.annotations.VisibleForTesting; @@ -149,7 +156,6 @@ import com.google.common.cache.RemovalListener; import com.google.common.cache.RemovalNotification; import com.google.common.cache.Weigher; -import com.google.protobuf.ByteString; public class ShuffleHandler extends AuxiliaryService { @@ -179,11 +185,15 @@ public class ShuffleHandler extends AuxiliaryService { private static final String INDEX_FILE_NAME = "file.out.index"; private int port; - private ChannelFactory selector; - private final ChannelGroup accepted = new DefaultChannelGroup(); - protected HttpPipelineFactory pipelineFact; + private NioEventLoopGroup bossGroup; + private NioEventLoopGroup workerGroup; + private final ChannelGroup accepted = new DefaultChannelGroup(GlobalEventExecutor.INSTANCE); private int sslFileBufferSize; + // pipeline items + private Shuffle SHUFFLE; + private SSLFactory sslFactory; + /** * Should the shuffle use posix_fadvise calls to manage the OS cache during * sendfile @@ -259,7 +269,6 @@ public class ShuffleHandler extends AuxiliaryService { boolean connectionKeepAliveEnabled = false; private int connectionKeepAliveTimeOut; private int mapOutputMetaInfoCacheSize; - private Timer timer; @Metrics(about="Shuffle output metrics", context="mapred", name="tez") static class ShuffleMetrics implements ChannelFutureListener { @@ -267,7 +276,7 @@ static class ShuffleMetrics implements ChannelFutureListener { MutableCounterLong shuffleOutputBytes; @Metric("# of failed shuffle outputs") MutableCounterInt shuffleOutputsFailed; - @Metric("# of succeeeded shuffle outputs") + @Metric("# of succeeded shuffle outputs") MutableCounterInt shuffleOutputsOK; @Metric("# of current shuffle connections") MutableGaugeInt shuffleConnections; @@ -295,30 +304,37 @@ public ReduceMapFileCount(ReduceContext rc) { @Override public void operationComplete(ChannelFuture future) throws Exception { + Channel ch = future.channel(); if (!future.isSuccess()) { - future.getChannel().close(); + ch.close(); return; } int waitCount = this.reduceContext.getMapsToWait().decrementAndGet(); if (waitCount == 0) { + LOG.debug("Finished with all map outputs"); + /* + * LastHttpContent.EMPTY_LAST_CONTENT can only be written when there are no remaining maps to send, + * this is the only time we can finish the HTTP response. + */ + ch.writeAndFlush(LastHttpContent.EMPTY_LAST_CONTENT); metrics.operationComplete(future); // Let the idle timer handler close keep-alive connections if (reduceContext.getKeepAlive()) { - ChannelPipeline pipeline = future.getChannel().getPipeline(); + ChannelPipeline pipeline = ch.pipeline(); TimeoutHandler timeoutHandler = (TimeoutHandler) pipeline.get(TIMEOUT_HANDLER); timeoutHandler.setEnabledTimeout(true); } else { - future.getChannel().close(); + ch.close(); } } else { - pipelineFact.getSHUFFLE().sendMap(reduceContext); + SHUFFLE.sendMap(reduceContext); } } } /** - * Maintain parameters per messageReceived() Netty context. + * Maintain parameters per channelRead() Netty context. * Allows sendMapOutput calls from operationComplete() */ private static class ReduceContext { @@ -415,9 +431,11 @@ public ShuffleHandler() { */ public static ByteBuffer serializeMetaData(int port) throws IOException { //TODO these bytes should be versioned - DataOutputBuffer port_dob = new DataOutputBuffer(); - port_dob.writeInt(port); - return ByteBuffer.wrap(port_dob.getData(), 0, port_dob.getLength()); + DataOutputBuffer portDob = new DataOutputBuffer(); + portDob.writeInt(port); + ByteBuffer buf = ByteBuffer.wrap(portDob.getData(), 0, portDob.getLength()); + portDob.close(); + return buf; } /** @@ -430,6 +448,7 @@ public static int deserializeMetaData(ByteBuffer meta) throws IOException { DataInputByteBuffer in = new DataInputByteBuffer(); in.reset(meta); int port = in.readInt(); + in.close(); return port; } @@ -455,6 +474,10 @@ static Token deserializeServiceData(ByteBuffer secret) throw return jt; } + public int getPort() { + return port; + } + @Override public void initializeApplication(ApplicationInitializationContext context) { @@ -512,22 +535,23 @@ protected void serviceInit(Configuration conf) throws Exception { DEFAULT_SHUFFLE_MAX_SESSION_OPEN_FILES); final String BOSS_THREAD_NAME_PREFIX = "Tez Shuffle Handler Boss #"; - NioServerBossPool bossPool = new NioServerBossPool(Executors.newCachedThreadPool(), 1, new ThreadNameDeterminer() { + AtomicInteger bossThreadCounter = new AtomicInteger(0); + bossGroup = new NioEventLoopGroup(1, new ThreadFactory() { @Override - public String determineThreadName(String currentThreadName, String proposedThreadName) throws Exception { - return BOSS_THREAD_NAME_PREFIX + currentThreadName.substring(currentThreadName.lastIndexOf('-') + 1); + public Thread newThread(Runnable r) { + return new Thread(r, BOSS_THREAD_NAME_PREFIX + bossThreadCounter.incrementAndGet()); } }); final String WORKER_THREAD_NAME_PREFIX = "Tez Shuffle Handler Worker #"; - NioWorkerPool workerPool = new NioWorkerPool(Executors.newCachedThreadPool(), maxShuffleThreads, new ThreadNameDeterminer() { + AtomicInteger workerThreadCounter = new AtomicInteger(0); + workerGroup = new NioEventLoopGroup(maxShuffleThreads, new ThreadFactory() { @Override - public String determineThreadName(String currentThreadName, String proposedThreadName) throws Exception { - return WORKER_THREAD_NAME_PREFIX + currentThreadName.substring(currentThreadName.lastIndexOf('-') + 1); + public Thread newThread(Runnable r) { + return new Thread(r, WORKER_THREAD_NAME_PREFIX + workerThreadCounter.incrementAndGet()); } }); - - selector = new NioServerSocketChannelFactory(bossPool, workerPool); + port = conf.getInt(SHUFFLE_PORT_CONFIG_KEY, DEFAULT_SHUFFLE_PORT); super.serviceInit(new YarnConfiguration(conf)); } @@ -538,25 +562,23 @@ protected void serviceStart() throws Exception { userRsrc = new ConcurrentHashMap(); secretManager = new JobTokenSecretManager(); recoverState(conf); - ServerBootstrap bootstrap = new ServerBootstrap(selector); - // Timer is shared across entire factory and must be released separately - timer = new HashedWheelTimer(); - try { - pipelineFact = new HttpPipelineFactory(conf, timer); - } catch (Exception ex) { - throw new RuntimeException(ex); - } - bootstrap.setOption("backlog", conf.getInt(SHUFFLE_LISTEN_QUEUE_SIZE, - DEFAULT_SHUFFLE_LISTEN_QUEUE_SIZE)); - bootstrap.setOption("child.keepAlive", true); - bootstrap.setPipelineFactory(pipelineFact); - port = conf.getInt(SHUFFLE_PORT_CONFIG_KEY, DEFAULT_SHUFFLE_PORT); - Channel ch = bootstrap.bind(new InetSocketAddress(port)); + ServerBootstrap bootstrap = new ServerBootstrap() + .channel(NioServerSocketChannel.class) + .group(bossGroup, workerGroup) + .localAddress(port) + .option(ChannelOption.SO_BACKLOG, + conf.getInt(SHUFFLE_LISTEN_QUEUE_SIZE, DEFAULT_SHUFFLE_LISTEN_QUEUE_SIZE)) + .childOption(ChannelOption.SO_KEEPALIVE, true); + initPipeline(bootstrap, conf); + Channel ch = bootstrap.bind().sync().channel(); accepted.add(ch); - port = ((InetSocketAddress)ch.getLocalAddress()).getPort(); + + // setup port + port = ((InetSocketAddress)ch.localAddress()).getPort(); conf.set(SHUFFLE_PORT_CONFIG_KEY, Integer.toString(port)); - pipelineFact.SHUFFLE.setPort(port); + SHUFFLE.setPort(port); LOG.info(getName() + " listening on port " + port); + super.serviceStart(); sslFileBufferSize = conf.getInt(SUFFLE_SSL_FILE_BUFFER_SIZE_KEY, @@ -572,20 +594,53 @@ protected void serviceStart() throws Exception { DEFAULT_SHUFFLE_MAPOUTPUT_META_INFO_CACHE_SIZE)); } + private void initPipeline(ServerBootstrap bootstrap, Configuration conf) throws Exception { + SHUFFLE = getShuffle(conf); + if (conf.getBoolean(SHUFFLE_SSL_ENABLED_KEY, SHUFFLE_SSL_ENABLED_DEFAULT)) { + LOG.info("Encrypted shuffle is enabled."); + sslFactory = new SSLFactory(SSLFactory.Mode.SERVER, conf); + sslFactory.init(); + } + + ChannelInitializer channelInitializer = + new ChannelInitializer() { + @Override + public void initChannel(NioSocketChannel ch) throws Exception { + ChannelPipeline pipeline = ch.pipeline(); + if (sslFactory != null) { + pipeline.addLast("ssl", new SslHandler(sslFactory.createSSLEngine())); + } + if (LOG.isDebugEnabled()) { + pipeline.addLast("loggingHandler", new LoggingHandler(LogLevel.DEBUG)); + } + pipeline.addLast("decoder", new HttpRequestDecoder()); + pipeline.addLast("aggregator", new HttpObjectAggregator(1 << 16)); + pipeline.addLast("encoder", new HttpResponseEncoder()); + pipeline.addLast("chunking", new ChunkedWriteHandler()); + pipeline.addLast("shuffle", SHUFFLE); + pipeline.addLast("idle", new IdleStateHandler(0, connectionKeepAliveTimeOut, 0)); + pipeline.addLast(TIMEOUT_HANDLER, new TimeoutHandler()); + } + }; + bootstrap.childHandler(channelInitializer); + } + + private void destroyPipeline() { + if (sslFactory != null) { + sslFactory.destroy(); + } + } + @Override protected void serviceStop() throws Exception { accepted.close().awaitUninterruptibly(10, TimeUnit.SECONDS); - if (selector != null) { - ServerBootstrap bootstrap = new ServerBootstrap(selector); - bootstrap.releaseExternalResources(); + if (bossGroup != null) { + bossGroup.shutdownGracefully(); } - if (pipelineFact != null) { - pipelineFact.destroy(); - } - if (timer != null) { - // Release this shared timer resource - timer.stop(); + if (workerGroup != null) { + workerGroup.shutdownGracefully(); } + destroyPipeline(); if (stateDb != null) { stateDb.close(); } @@ -607,6 +662,10 @@ protected Shuffle getShuffle(Configuration conf) { return new Shuffle(conf); } + protected JobTokenSecretManager getSecretManager() { + return secretManager; + } + private void recoverState(Configuration conf) throws IOException { Path recoveryRoot = getRecoveryPath(); if (recoveryRoot != null) { @@ -726,7 +785,7 @@ private void checkVersion() throws IOException { private void addJobToken(JobID jobId, String user, Token jobToken) { userRsrc.put(jobId.toString(), user); - secretManager.addTokenForJob(jobId.toString(), jobToken); + getSecretManager().addTokenForJob(jobId.toString(), jobToken); LOG.info("Added token for " + jobId.toString()); } @@ -752,9 +811,10 @@ private void recoverJobShuffleInfo(String jobIdStr, byte[] data) private void recordJobShuffleInfo(JobID jobId, String user, Token jobToken) throws IOException { if (stateDb != null) { + // Discover type instead of assuming ByteString to allow for shading. TokenProto tokenProto = TokenProto.newBuilder() - .setIdentifier(ByteString.copyFrom(jobToken.getIdentifier())) - .setPassword(ByteString.copyFrom(jobToken.getPassword())) + .setIdentifier(TokenProto.getDefaultInstance().getIdentifier().copyFrom(jobToken.getIdentifier())) + .setPassword(TokenProto.getDefaultInstance().getPassword().copyFrom(jobToken.getPassword())) .setKind(jobToken.getKind().toString()) .setService(jobToken.getService().toString()) .build(); @@ -771,7 +831,7 @@ private void recordJobShuffleInfo(JobID jobId, String user, private void removeJobShuffleInfo(JobID jobId) throws IOException { String jobIdStr = jobId.toString(); - secretManager.removeTokenForJob(jobIdStr); + getSecretManager().removeTokenForJob(jobIdStr); userRsrc.remove(jobIdStr); if (stateDb != null) { try { @@ -792,7 +852,7 @@ public void log(String message) { } } - static class TimeoutHandler extends IdleStateAwareChannelHandler { + static class TimeoutHandler extends ChannelDuplexHandler { private boolean enabledTimeout; @@ -801,59 +861,14 @@ void setEnabledTimeout(boolean enabledTimeout) { } @Override - public void channelIdle(ChannelHandlerContext ctx, IdleStateEvent e) { - if (e.getState() == IdleState.WRITER_IDLE && enabledTimeout) { - e.getChannel().close(); - } - } - } - - class HttpPipelineFactory implements ChannelPipelineFactory { - - final Shuffle SHUFFLE; - private SSLFactory sslFactory; - private final ChannelHandler idleStateHandler; - - public HttpPipelineFactory(Configuration conf, Timer timer) throws Exception { - SHUFFLE = getShuffle(conf); - if (conf.getBoolean(SHUFFLE_SSL_ENABLED_KEY, - SHUFFLE_SSL_ENABLED_DEFAULT)) { - LOG.info("Encrypted shuffle is enabled."); - sslFactory = new SSLFactory(SSLFactory.Mode.SERVER, conf); - sslFactory.init(); - } - this.idleStateHandler = new IdleStateHandler(timer, 0, connectionKeepAliveTimeOut, 0); - } - - public Shuffle getSHUFFLE() { - return SHUFFLE; - } - - public void destroy() { - if (sslFactory != null) { - sslFactory.destroy(); + public void userEventTriggered(ChannelHandlerContext ctx, Object evt) throws Exception { + if (evt instanceof IdleStateEvent) { + IdleStateEvent e = (IdleStateEvent) evt; + if (e.state() == IdleState.WRITER_IDLE && enabledTimeout) { + ctx.channel().close(); + } } } - - @Override - public ChannelPipeline getPipeline() throws Exception { - ChannelPipeline pipeline = Channels.pipeline(); - if (sslFactory != null) { - pipeline.addLast("ssl", new SslHandler(sslFactory.createSSLEngine())); - } - pipeline.addLast("decoder", new HttpRequestDecoder()); - pipeline.addLast("aggregator", new HttpChunkAggregator(1 << 16)); - pipeline.addLast("encoder", new HttpResponseEncoder()); - pipeline.addLast("chunking", new ChunkedWriteHandler()); - pipeline.addLast("shuffle", SHUFFLE); - pipeline.addLast("idle", idleStateHandler); - pipeline.addLast(TIMEOUT_HANDLER, new TimeoutHandler()); - return pipeline; - // TODO factor security manager into pipeline - // TODO factor out encode/decode to permit binary shuffle - // TODO factor out decode of index to permit alt. models - } - } protected static class Range { @@ -879,15 +894,14 @@ public String toString() { } } - class Shuffle extends SimpleChannelUpstreamHandler { + @Sharable + class Shuffle extends ChannelInboundHandlerAdapter { private static final int MAX_WEIGHT = 10 * 1024 * 1024; private static final int EXPIRE_AFTER_ACCESS_MINUTES = 5; private static final int ALLOWED_CONCURRENCY = 16; private final Configuration conf; private final IndexCache indexCache; - private final LocalDirAllocator lDirAlloc = - new LocalDirAllocator(YarnConfiguration.NM_LOCAL_DIRS); private int port; private final LoadingCache pathCache = CacheBuilder.newBuilder().expireAfterAccess(EXPIRE_AFTER_ACCESS_MINUTES, @@ -920,14 +934,12 @@ public AttemptPathInfo load(AttemptPathIdentifier key) throws Exception { String base = getBaseLocation(key.jobId, key.dagId, key.user); String attemptBase = base + key.attemptId; - Path indexFileName = lDirAlloc.getLocalPathToRead( - attemptBase + Path.SEPARATOR + INDEX_FILE_NAME, conf); - Path mapOutputFileName = lDirAlloc.getLocalPathToRead( - attemptBase + Path.SEPARATOR + DATA_FILE_NAME, conf); + Path indexFileName = getAuxiliaryLocalPathHandler() + .getLocalPathForRead(attemptBase + "/" + INDEX_FILE_NAME); + Path mapOutputFileName = getAuxiliaryLocalPathHandler() + .getLocalPathForRead(attemptBase + "/" + DATA_FILE_NAME); - if (LOG.isDebugEnabled()) { - LOG.debug("Loaded : " + key + " via loader"); - } + LOG.debug("Loaded : {} via loader", key); return new AttemptPathInfo(indexFileName, mapOutputFileName); } }); @@ -967,51 +979,56 @@ private Range splitReduces(List reduceq) { } @Override - public void channelOpen(ChannelHandlerContext ctx, ChannelStateEvent evt) + public void channelActive(ChannelHandlerContext ctx) throws Exception { if ((maxShuffleConnections > 0) && (accepted.size() >= maxShuffleConnections)) { LOG.info(String.format("Current number of shuffle connections (%d) is " + "greater than or equal to the max allowed shuffle connections (%d)", accepted.size(), maxShuffleConnections)); - evt.getChannel().close(); + ctx.channel().close(); return; } - accepted.add(evt.getChannel()); - super.channelOpen(ctx, evt); + accepted.add(ctx.channel()); + super.channelActive(ctx); } @Override - public void messageReceived(ChannelHandlerContext ctx, MessageEvent evt) + public void channelRead(ChannelHandlerContext ctx, Object message) throws Exception { - HttpRequest request = (HttpRequest) evt.getMessage(); + HttpRequest request = (HttpRequest) message; + handleRequest(ctx, request); + } + + private void handleRequest(ChannelHandlerContext ctx, HttpRequest request) + throws IOException, Exception { if (request.getMethod() != GET) { sendError(ctx, METHOD_NOT_ALLOWED); return; } // Check whether the shuffle version is compatible if (!ShuffleHeader.DEFAULT_HTTP_HEADER_NAME.equals( - request.getHeader(ShuffleHeader.HTTP_HEADER_NAME)) + request.headers().get(ShuffleHeader.HTTP_HEADER_NAME)) || !ShuffleHeader.DEFAULT_HTTP_HEADER_VERSION.equals( - request.getHeader(ShuffleHeader.HTTP_HEADER_VERSION))) { + request.headers().get(ShuffleHeader.HTTP_HEADER_VERSION))) { sendError(ctx, "Incompatible shuffle request version", BAD_REQUEST); + return; } - final Map> q = - new QueryStringDecoder(request.getUri()).getParameters(); + final Map> q = new QueryStringDecoder(request.getUri()).parameters(); final List keepAliveList = q.get("keepAlive"); final List dagCompletedQ = q.get("dagAction"); + final List vertexCompletedQ = q.get("vertexAction"); + final List taskAttemptFailedQ = q.get("taskAttemptAction"); boolean keepAliveParam = false; if (keepAliveList != null && keepAliveList.size() == 1) { keepAliveParam = Boolean.parseBoolean(keepAliveList.get(0)); - if (LOG.isDebugEnabled()) { - LOG.debug("KeepAliveParam : " + keepAliveList - + " : " + keepAliveParam); - } + LOG.debug("KeepAliveParam : {} : {}", keepAliveList, keepAliveParam); } final List mapIds = splitMaps(q.get("map")); final Range reduceRange = splitReduces(q.get("reduce")); final List jobQ = q.get("job"); final List dagIdQ = q.get("dag"); + final List vertexIdQ = q.get("vertex"); if (LOG.isDebugEnabled()) { LOG.debug("RECV: " + request.getUri() + "\n mapId: " + mapIds + @@ -1021,7 +1038,13 @@ public void messageReceived(ChannelHandlerContext ctx, MessageEvent evt) "\n keepAlive: " + keepAliveParam); } // If the request is for Dag Deletion, process the request and send OK. - if (deleteDagDirectories(evt, dagCompletedQ, jobQ, dagIdQ)) { + if (deleteDagDirectories(ctx.channel(), dagCompletedQ, jobQ, dagIdQ)) { + return; + } + if (deleteVertexDirectories(ctx.channel(), vertexCompletedQ, jobQ, dagIdQ, vertexIdQ)) { + return; + } + if (deleteTaskAttemptDirectories(ctx.channel(), taskAttemptFailedQ, jobQ, dagIdQ, mapIds)) { return; } if (mapIds == null || reduceRange == null || jobQ == null || dagIdQ == null) { @@ -1070,8 +1093,8 @@ public void messageReceived(ChannelHandlerContext ctx, MessageEvent evt) Map mapOutputInfoMap = new HashMap(); - Channel ch = evt.getChannel(); - ChannelPipeline pipeline = ch.getPipeline(); + Channel ch = ctx.channel(); + ChannelPipeline pipeline = ch.pipeline(); TimeoutHandler timeoutHandler = (TimeoutHandler)pipeline.get(TIMEOUT_HANDLER); timeoutHandler.setEnabledTimeout(false); String user = userRsrc.get(jobId); @@ -1079,15 +1102,23 @@ public void messageReceived(ChannelHandlerContext ctx, MessageEvent evt) try { populateHeaders(mapIds, jobId, dagId, user, reduceRange, response, keepAliveParam, mapOutputInfoMap); - } catch(IOException e) { + } catch (DiskErrorException e) { // fatal error: fetcher should be aware of that + LOG.error("Shuffle error in populating headers (fatal: DiskErrorException):", e); + String errorMessage = getErrorMessage(e); + // custom message, might be noticed by fetchers + // it should reuse the current response object, as headers have been already set for it + sendFakeShuffleHeaderWithError(ctx, + ShuffleHandlerError.DISK_ERROR_EXCEPTION + ": " + errorMessage, response); + return; + } catch (IOException e) { ch.write(response); LOG.error("Shuffle error in populating headers :", e); String errorMessage = getErrorMessage(e); - sendError(ctx,errorMessage , INTERNAL_SERVER_ERROR); + sendError(ctx, errorMessage, INTERNAL_SERVER_ERROR); return; } ch.write(response); - //Initialize one ReduceContext object per messageReceived call + //Initialize one ReduceContext object per channelRead call boolean keepAlive = keepAliveParam || connectionKeepAliveEnabled; ReduceContext reduceContext = new ReduceContext(mapIds, reduceRange, ctx, user, mapOutputInfoMap, jobId, dagId, keepAlive); @@ -1099,25 +1130,88 @@ public void messageReceived(ChannelHandlerContext ctx, MessageEvent evt) } } - private boolean deleteDagDirectories(MessageEvent evt, + private boolean isNullOrEmpty(List entries) { + return entries == null || entries.isEmpty(); + } + + private boolean notEmptyAndContains(List entries, String key) { + if (entries == null || entries.isEmpty()) { + return false; + } + return entries.get(0).contains(key); + } + + private boolean deleteDagDirectories(Channel channel, List dagCompletedQ, List jobQ, List dagIdQ) { if (jobQ == null || jobQ.isEmpty()) { return false; } - if (dagCompletedQ != null && !dagCompletedQ.isEmpty() && dagCompletedQ.get(0).contains("delete") - && dagIdQ != null && !dagIdQ.isEmpty()) { + if (notEmptyAndContains(dagCompletedQ,"delete") && !isNullOrEmpty(dagIdQ)) { String base = getDagLocation(jobQ.get(0), dagIdQ.get(0), userRsrc.get(jobQ.get(0))); try { FileContext lfc = FileContext.getLocalFSFileContext(); - for(Path dagPath : lDirAlloc.getAllLocalPathsToRead(base, conf)) { + for(Path dagPath : getAuxiliaryLocalPathHandler().getAllLocalPathsForRead(base)) { lfc.delete(dagPath, true); } } catch (IOException e) { LOG.warn("Encountered exception during dag delete "+ e); } - evt.getChannel().write(new DefaultHttpResponse(HTTP_1_1, OK)); - evt.getChannel().close(); + channel.writeAndFlush(new DefaultHttpResponse(HTTP_1_1, OK)) + .addListener(ChannelFutureListener.CLOSE); + return true; + } + return false; + } + + private boolean deleteVertexDirectories(Channel channel, List vertexCompletedQ, + List jobQ, List dagIdQ, + List vertexIdQ) { + if (jobQ == null || jobQ.isEmpty()) { + return false; + } + if (notEmptyAndContains(vertexCompletedQ, "delete") && !isNullOrEmpty(vertexIdQ)) { + try { + deleteTaskDirsOfVertex(jobQ.get(0), dagIdQ.get(0), vertexIdQ.get(0), userRsrc.get(jobQ.get(0))); + } catch (IOException e) { + LOG.warn("Encountered exception during vertex delete " + e); + } + channel.writeAndFlush(new DefaultHttpResponse(HTTP_1_1, OK)) + .addListener(ChannelFutureListener.CLOSE); + return true; + } + return false; + } + + private boolean deleteTaskAttemptDirectories(Channel channel, List taskAttemptFailedQ, + List jobQ, List dagIdQ, List taskAttemptIdQ) { + if (jobQ == null || jobQ.isEmpty()) { + return false; + } + if (notEmptyAndContains(taskAttemptFailedQ,"delete") && !isNullOrEmpty(taskAttemptIdQ)) { + for (String taskAttemptId : taskAttemptIdQ) { + String baseStr = getBaseLocation(jobQ.get(0), dagIdQ.get(0), userRsrc.get(jobQ.get(0))); + try { + FileSystem fs = FileSystem.getLocal(conf).getRaw(); + for (Path basePath : getAuxiliaryLocalPathHandler().getAllLocalPathsForRead(baseStr)) { + for (FileStatus fileStatus : fs.listStatus(basePath)) { + Path taskAttemptPath = fileStatus.getPath(); + if (taskAttemptPath.getName().startsWith(taskAttemptId)) { + if (fs.delete(taskAttemptPath, true)) { + LOG.info("Deleted directory : " + taskAttemptPath); + // remove entry from IndexCache + indexCache.removeMap(taskAttemptPath.getName()); + break; + } + } + } + } + } catch (IOException e) { + LOG.warn("Encountered exception during failed task attempt delete " + e); + } + } + channel.writeAndFlush(new DefaultHttpResponse(HTTP_1_1, OK)) + .addListener(ChannelFutureListener.CLOSE); return true; } return false; @@ -1125,7 +1219,7 @@ private boolean deleteDagDirectories(MessageEvent evt, /** * Calls sendMapOutput for the mapId pointed by ReduceContext.mapsToSend - * and increments it. This method is first called by messageReceived() + * and increments it. This method is first called by channelRead() * maxSessionOpenFiles times and then on the completion of every * sendMapOutput operation. This limits the number of open files on a node, * which can get really large(exhausting file descriptors on the NM) if all @@ -1135,7 +1229,6 @@ private boolean deleteDagDirectories(MessageEvent evt, */ public ChannelFuture sendMap(ReduceContext reduceContext) throws Exception { - ChannelFuture nextMap = null; if (reduceContext.getMapsToSend().get() < reduceContext.getMapIds().size()) { @@ -1144,14 +1237,16 @@ public ChannelFuture sendMap(ReduceContext reduceContext) try { MapOutputInfo info = reduceContext.getInfoMap().get(mapId); + if (info == null) { - info = getMapOutputInfo(reduceContext.dagId, mapId, + info = getMapOutputInfo(reduceContext.dagId, mapId, reduceContext.getReduceRange(), reduceContext.getJobId(), reduceContext.getUser()); } + nextMap = sendMapOutput( reduceContext.getCtx(), - reduceContext.getCtx().getChannel(), + reduceContext.getCtx().channel(), reduceContext.getUser(), mapId, reduceContext.getReduceRange(), info); if (null == nextMap) { @@ -1160,7 +1255,11 @@ public ChannelFuture sendMap(ReduceContext reduceContext) } nextMap.addListener(new ReduceMapFileCount(reduceContext)); } catch (IOException e) { - LOG.error("Shuffle error :", e); + if (e instanceof DiskChecker.DiskErrorException) { + LOG.error("Shuffle error :" + e); + } else { + LOG.error("Shuffle error :", e); + } String errorMessage = getErrorMessage(e); sendError(reduceContext.getCtx(), errorMessage, INTERNAL_SERVER_ERROR); @@ -1185,6 +1284,29 @@ private String getBaseLocation(String jobId, String dagId, String user) { return baseStr; } + /** + * Delete shuffle data in task directories belonging to a vertex. + */ + private void deleteTaskDirsOfVertex(String jobId, String dagId, String vertexId, String user) throws IOException { + String baseStr = getBaseLocation(jobId, dagId, user); + FileContext lfc = FileContext.getLocalFSFileContext(); + for(Path dagPath : getAuxiliaryLocalPathHandler().getAllLocalPathsForRead(baseStr)) { + RemoteIterator status = lfc.listStatus(dagPath); + final JobID jobID = JobID.forName(jobId); + String taskDirPrefix = String.format("attempt%s_%s_%s_", + jobID.toString().replace("job", ""), dagId, vertexId); + while (status.hasNext()) { + FileStatus fileStatus = status.next(); + Path attemptPath = fileStatus.getPath(); + if (attemptPath.getName().startsWith(taskDirPrefix)) { + if(lfc.delete(attemptPath, true)) { + LOG.debug("deleted shuffle data in task directory: {}", attemptPath); + } + } + } + } + } + private String getDagLocation(String jobId, String dagId, String user) { final JobID jobID = JobID.forName(jobId); final ApplicationId appID = @@ -1199,18 +1321,15 @@ private String getDagLocation(String jobId, String dagId, String user) { } protected MapOutputInfo getMapOutputInfo(String dagId, String mapId, - String jobId, + Range reduceRange, String jobId, String user) throws IOException { AttemptPathInfo pathInfo; try { AttemptPathIdentifier identifier = new AttemptPathIdentifier( jobId, dagId, user, mapId); pathInfo = pathCache.get(identifier); - if (LOG.isDebugEnabled()) { - LOG.debug("Retrieved pathInfo for " + identifier + - " check for corresponding loaded messages to determine whether" + - " it was loaded or cached"); - } + LOG.debug("Retrieved pathInfo for {} check for corresponding loaded " + + "messages to determine whether it was loaded or cached", identifier); } catch (ExecutionException e) { if (e.getCause() instanceof IOException) { throw (IOException) e.getCause(); @@ -1228,8 +1347,13 @@ protected MapOutputInfo getMapOutputInfo(String dagId, String mapId, pathInfo.indexPath); } + MapOutputInfo outputInfo; + if (reduceRange.first == reduceRange.last) { + outputInfo = new MapOutputInfo(pathInfo.dataPath, spillRecord.getIndex(reduceRange.first), reduceRange); + } else { - MapOutputInfo outputInfo = new MapOutputInfo(pathInfo.dataPath, spillRecord); + outputInfo = new MapOutputInfo(pathInfo.dataPath, spillRecord, reduceRange); + } return outputInfo; } @@ -1257,12 +1381,12 @@ long getContentLength(List mapIds, String jobId, String dagId, String us int reduceCountVSize = WritableUtils.getVIntSize(reduceRange.getLast() - reduceRange.getFirst() + 1); for (String mapId : mapIds) { contentLength += reduceCountVSize; - MapOutputInfo outputInfo = getMapOutputInfo(dagId, mapId, jobId, user); + MapOutputInfo outputInfo = getMapOutputInfo(dagId, mapId, reduceRange, jobId, user); if (mapOutputInfoMap.size() < mapOutputMetaInfoCacheSize) { mapOutputInfoMap.put(mapId, outputInfo); } for (int reduce = reduceRange.getFirst(); reduce <= reduceRange.getLast(); reduce++) { - TezIndexRecord indexRecord = outputInfo.spillRecord.getIndex(reduce); + TezIndexRecord indexRecord = outputInfo.getIndex(reduce); ShuffleHeader header = new ShuffleHeader(mapId, indexRecord.getPartLength(), indexRecord.getRawLength(), reduce); @@ -1275,34 +1399,55 @@ long getContentLength(List mapIds, String jobId, String dagId, String us protected void setResponseHeaders(HttpResponse response, boolean keepAliveParam, long contentLength) { if (connectionKeepAliveEnabled || keepAliveParam) { - response.setHeader(HttpHeaders.Names.CONTENT_LENGTH, String.valueOf(contentLength)); - response.setHeader(HttpHeaders.Names.CONNECTION, HttpHeaders.Values.KEEP_ALIVE); - response.setHeader(HttpHeaders.Values.KEEP_ALIVE, "timeout=" + connectionKeepAliveTimeOut); - if (LOG.isDebugEnabled()) { - LOG.debug("Content Length in shuffle : " + contentLength); - } + response.headers().set(HttpHeaders.Names.CONTENT_LENGTH, String.valueOf(contentLength)); + response.headers().set(HttpHeaders.Names.CONNECTION, HttpHeaders.Values.KEEP_ALIVE); + response.headers().set(HttpHeaders.Values.KEEP_ALIVE, "timeout=" + connectionKeepAliveTimeOut); + LOG.debug("Content Length in shuffle : {}", contentLength); } else { - if (LOG.isDebugEnabled()) { - LOG.debug("Setting connection close header..."); - } - response.setHeader(HttpHeaders.Names.CONNECTION, CONNECTION_CLOSE); + LOG.debug("Setting connection close header..."); + response.headers().set(HttpHeaders.Names.CONNECTION, CONNECTION_CLOSE); } } class MapOutputInfo { - final Path mapOutputFileName; - final TezSpillRecord spillRecord; + private final Path mapOutputFileName; + private TezSpillRecord spillRecord; + private TezIndexRecord indexRecord; + private final Range reduceRange; + + MapOutputInfo(Path mapOutputFileName, TezIndexRecord indexRecord, Range reduceRange) { + this.mapOutputFileName = mapOutputFileName; + this.indexRecord = indexRecord; + this.reduceRange = reduceRange; + } - MapOutputInfo(Path mapOutputFileName, TezSpillRecord spillRecord) { + MapOutputInfo(Path mapOutputFileName, TezSpillRecord spillRecord, Range reduceRange) { this.mapOutputFileName = mapOutputFileName; this.spillRecord = spillRecord; + this.reduceRange = reduceRange; + } + + TezIndexRecord getIndex(int index) { + if (index < reduceRange.first || index > reduceRange.last) { + throw new IllegalArgumentException("Reduce Index: " + index + " out of range for " + mapOutputFileName); + } + if (spillRecord != null) { + return spillRecord.getIndex(index); + } else { + return indexRecord; + } + } + + public void finish() { + spillRecord = null; + indexRecord = null; } } protected void verifyRequest(String appid, ChannelHandlerContext ctx, HttpRequest request, HttpResponse response, URL requestUri) throws IOException { - SecretKey tokenSecret = secretManager.retrieveTokenSecret(appid); + SecretKey tokenSecret = getSecretManager().retrieveTokenSecret(appid); if (null == tokenSecret) { LOG.info("Request for unknown token " + appid); throw new IOException("could not find jobid"); @@ -1311,7 +1456,7 @@ protected void verifyRequest(String appid, ChannelHandlerContext ctx, String enc_str = SecureShuffleUtils.buildMsgFrom(requestUri); // hash from the fetcher String urlHashStr = - request.getHeader(SecureShuffleUtils.HTTP_HEADER_URL_HASH); + request.headers().get(SecureShuffleUtils.HTTP_HEADER_URL_HASH); if (urlHashStr == null) { LOG.info("Missing header hash for " + appid); throw new IOException("fetcher cannot be authenticated"); @@ -1327,11 +1472,11 @@ protected void verifyRequest(String appid, ChannelHandlerContext ctx, String reply = SecureShuffleUtils.generateHash(urlHashStr.getBytes(Charsets.UTF_8), tokenSecret); - response.setHeader(SecureShuffleUtils.HTTP_HEADER_REPLY_URL_HASH, reply); + response.headers().set(SecureShuffleUtils.HTTP_HEADER_REPLY_URL_HASH, reply); // Put shuffle version into http header - response.setHeader(ShuffleHeader.HTTP_HEADER_NAME, + response.headers().set(ShuffleHeader.HTTP_HEADER_NAME, ShuffleHeader.DEFAULT_HTTP_HEADER_NAME); - response.setHeader(ShuffleHeader.HTTP_HEADER_VERSION, + response.headers().set(ShuffleHeader.HTTP_HEADER_VERSION, ShuffleHeader.DEFAULT_HTTP_HEADER_VERSION); if (LOG.isDebugEnabled()) { int len = reply.length(); @@ -1349,9 +1494,9 @@ protected ChannelFuture sendMapOutput(ChannelHandlerContext ctx, Channel ch, DataOutputBuffer dobRange = new DataOutputBuffer(); // Indicate how many record to be written WritableUtils.writeVInt(dobRange, reduceRange.getLast() - reduceRange.getFirst() + 1); - ch.write(wrappedBuffer(dobRange.getData(), 0, dobRange.getLength())); + ch.writeAndFlush(wrappedBuffer(dobRange.getData(), 0, dobRange.getLength())); for (int reduce = reduceRange.getFirst(); reduce <= reduceRange.getLast(); reduce++) { - TezIndexRecord index = outputInfo.spillRecord.getIndex(reduce); + TezIndexRecord index = outputInfo.getIndex(reduce); // Records are only valid if they have a non-zero part length if (index.getPartLength() != 0) { if (firstIndex == null) { @@ -1363,8 +1508,10 @@ protected ChannelFuture sendMapOutput(ChannelHandlerContext ctx, Channel ch, ShuffleHeader header = new ShuffleHeader(mapId, index.getPartLength(), index.getRawLength(), reduce); DataOutputBuffer dob = new DataOutputBuffer(); header.write(dob); - ch.write(wrappedBuffer(dob.getData(), 0, dob.getLength())); + // Free the memory needed to store the spill and index records + ch.writeAndFlush(wrappedBuffer(dob.getData(), 0, dob.getLength())); } + outputInfo.finish(); final long rangeOffset = firstIndex.getStartOffset(); final long rangePartLength = lastIndex.getStartOffset() + lastIndex.getPartLength() - firstIndex.getStartOffset(); @@ -1377,62 +1524,73 @@ protected ChannelFuture sendMapOutput(ChannelHandlerContext ctx, Channel ch, return null; } ChannelFuture writeFuture; - if (ch.getPipeline().get(SslHandler.class) == null) { + if (ch.pipeline().get(SslHandler.class) == null) { final FadvisedFileRegion partition = new FadvisedFileRegion(spill, rangeOffset, rangePartLength, manageOsCache, readaheadLength, readaheadPool, spillFile.getAbsolutePath(), shuffleBufferSize, shuffleTransferToAllowed); - writeFuture = ch.write(partition); - writeFuture.addListener(new ChannelFutureListener() { - // TODO error handling; distinguish IO/connection failures, - // attribute to appropriate spill output - @Override - public void operationComplete(ChannelFuture future) { - if (future.isSuccess()) { - partition.transferSuccessful(); - } - partition.releaseExternalResources(); - } - }); + writeFuture = ch.writeAndFlush(partition); } else { // HTTPS cannot be done with zero copy. final FadvisedChunkedFile chunk = new FadvisedChunkedFile(spill, rangeOffset, rangePartLength, sslFileBufferSize, manageOsCache, readaheadLength, readaheadPool, spillFile.getAbsolutePath()); - writeFuture = ch.write(chunk); + writeFuture = ch.writeAndFlush(chunk); } metrics.shuffleConnections.incr(); metrics.shuffleOutputBytes.incr(rangePartLength); // optimistic return writeFuture; } - protected void sendError(ChannelHandlerContext ctx, - HttpResponseStatus status) { + protected void sendError(ChannelHandlerContext ctx, HttpResponseStatus status) { sendError(ctx, "", status); } - protected void sendError(ChannelHandlerContext ctx, String message, - HttpResponseStatus status) { - HttpResponse response = new DefaultHttpResponse(HTTP_1_1, status); - response.setHeader(CONTENT_TYPE, "text/plain; charset=UTF-8"); + protected void sendError(ChannelHandlerContext ctx, String message, HttpResponseStatus status) { + FullHttpResponse response = new DefaultFullHttpResponse(HTTP_1_1, status); + sendError(ctx, message, response); + } + + protected void sendError(ChannelHandlerContext ctx, String message, FullHttpResponse response) { + sendError(ctx, Unpooled.copiedBuffer(message, CharsetUtil.UTF_8), response); + } + + private void sendFakeShuffleHeaderWithError(ChannelHandlerContext ctx, String message, + HttpResponse response) throws IOException { + FullHttpResponse fullResponse = + new DefaultFullHttpResponse(response.getProtocolVersion(), response.getStatus()); + fullResponse.headers().set(response.headers()); + + ShuffleHeader header = new ShuffleHeader(message, -1, -1, -1); + DataOutputBuffer out = new DataOutputBuffer(); + header.write(out); + + sendError(ctx, wrappedBuffer(out.getData(), 0, out.getLength()), fullResponse); + } + + protected void sendError(ChannelHandlerContext ctx, ByteBuf content, + FullHttpResponse response) { + response.headers().set(CONTENT_TYPE, "text/plain; charset=UTF-8"); // Put shuffle version into http header - response.setHeader(ShuffleHeader.HTTP_HEADER_NAME, + response.headers().set(ShuffleHeader.HTTP_HEADER_NAME, ShuffleHeader.DEFAULT_HTTP_HEADER_NAME); - response.setHeader(ShuffleHeader.HTTP_HEADER_VERSION, + response.headers().set(ShuffleHeader.HTTP_HEADER_VERSION, ShuffleHeader.DEFAULT_HTTP_HEADER_VERSION); - response.setContent( - ChannelBuffers.copiedBuffer(message, CharsetUtil.UTF_8)); + response.content().writeBytes(content); // Close the connection as soon as the error message is sent. - ctx.getChannel().write(response).addListener(ChannelFutureListener.CLOSE); + ctx.channel().writeAndFlush(response).addListener(ChannelFutureListener.CLOSE); + /* + * The general rule of thumb is that the party that accesses a reference-counted object last + * is also responsible for the destruction of that reference-counted object. + */ + content.release(); } @Override - public void exceptionCaught(ChannelHandlerContext ctx, ExceptionEvent e) + public void exceptionCaught(ChannelHandlerContext ctx, Throwable cause) throws Exception { - Channel ch = e.getChannel(); - Throwable cause = e.getCause(); if (cause instanceof TooLongFrameException) { sendError(ctx, BAD_REQUEST); return; @@ -1449,8 +1607,8 @@ public void exceptionCaught(ChannelHandlerContext ctx, ExceptionEvent e) } LOG.error("Shuffle error: ", cause); - if (ch.isConnected()) { - LOG.error("Shuffle error " + e); + if (ctx.channel().isActive()) { + LOG.error("Shuffle error", cause); sendError(ctx, INTERNAL_SERVER_ERROR); } } diff --git a/tez-plugins/tez-aux-services/src/test/java/org/apache/tez/auxservices/TestIndexCache.java b/tez-plugins/tez-aux-services/src/test/java/org/apache/tez/auxservices/TestIndexCache.java new file mode 100644 index 0000000000..802fdd6f60 --- /dev/null +++ b/tez-plugins/tez-aux-services/src/test/java/org/apache/tez/auxservices/TestIndexCache.java @@ -0,0 +1,335 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tez.auxservices; + +import java.io.DataOutputStream; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.util.Random; +import java.util.zip.CRC32; +import java.util.zip.CheckedOutputStream; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.ChecksumException; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.security.UserGroupInformation; + +import org.apache.tez.runtime.library.common.Constants; +import org.apache.tez.runtime.library.common.sort.impl.TezIndexRecord; +import org.junit.Before; +import org.junit.Test; + +import static org.apache.tez.auxservices.IndexCache.INDEX_CACHE_MB; +import static org.junit.Assert.*; + +public class TestIndexCache { + private Configuration conf; + private FileSystem fs; + private Path p; + + @Before + public void setUp() throws IOException { + conf = new Configuration(); + fs = FileSystem.getLocal(conf).getRaw(); + p = new Path(System.getProperty("test.build.data", "/tmp"), + "cache").makeQualified(fs.getUri(), fs.getWorkingDirectory()); + } + + @Test + public void testLRCPolicy() throws Exception { + Random r = new Random(); + long seed = r.nextLong(); + r.setSeed(seed); + System.out.println("seed: " + seed); + fs.delete(p, true); + conf.setInt(INDEX_CACHE_MB, 1); + final int partsPerMap = 1000; + final int bytesPerFile = partsPerMap * 24; + IndexCache cache = new IndexCache(conf); + + // fill cache + int totalsize = bytesPerFile; + for (; totalsize < 1024 * 1024; totalsize += bytesPerFile) { + Path f = new Path(p, Integer.toString(totalsize, 36)); + writeFile(fs, f, totalsize, partsPerMap); + TezIndexRecord rec = cache.getIndexInformation( + Integer.toString(totalsize, 36), r.nextInt(partsPerMap), f, + UserGroupInformation.getCurrentUser().getShortUserName()); + checkRecord(rec, totalsize); + } + + // delete files, ensure cache retains all elem + for (FileStatus stat : fs.listStatus(p)) { + fs.delete(stat.getPath(),true); + } + for (int i = bytesPerFile; i < 1024 * 1024; i += bytesPerFile) { + Path f = new Path(p, Integer.toString(i, 36)); + TezIndexRecord rec = cache.getIndexInformation(Integer.toString(i, 36), + r.nextInt(partsPerMap), f, + UserGroupInformation.getCurrentUser().getShortUserName()); + checkRecord(rec, i); + } + + // push oldest (bytesPerFile) out of cache + Path f = new Path(p, Integer.toString(totalsize, 36)); + writeFile(fs, f, totalsize, partsPerMap); + cache.getIndexInformation(Integer.toString(totalsize, 36), + r.nextInt(partsPerMap), f, + UserGroupInformation.getCurrentUser().getShortUserName()); + fs.delete(f, false); + + // oldest fails to read, or error + boolean fnf = false; + try { + cache.getIndexInformation(Integer.toString(bytesPerFile, 36), + r.nextInt(partsPerMap), new Path(p, Integer.toString(bytesPerFile)), + UserGroupInformation.getCurrentUser().getShortUserName()); + } catch (IOException e) { + if (e.getCause() == null || + !(e.getCause() instanceof FileNotFoundException)) { + throw e; + } + else { + fnf = true; + } + } + if (!fnf) + fail("Failed to push out last entry"); + // should find all the other entries + for (int i = bytesPerFile << 1; i < 1024 * 1024; i += bytesPerFile) { + TezIndexRecord rec = cache.getIndexInformation(Integer.toString(i, 36), + r.nextInt(partsPerMap), new Path(p, Integer.toString(i, 36)), + UserGroupInformation.getCurrentUser().getShortUserName()); + checkRecord(rec, i); + } + TezIndexRecord rec = cache.getIndexInformation(Integer.toString(totalsize, 36), + r.nextInt(partsPerMap), f, + UserGroupInformation.getCurrentUser().getShortUserName()); + + checkRecord(rec, totalsize); + } + + @Test + public void testBadIndex() throws Exception { + final int parts = 30; + fs.delete(p, true); + conf.setInt(INDEX_CACHE_MB, 1); + IndexCache cache = new IndexCache(conf); + + Path f = new Path(p, "badindex"); + FSDataOutputStream out = fs.create(f, false); + CheckedOutputStream iout = new CheckedOutputStream(out, new CRC32()); + DataOutputStream dout = new DataOutputStream(iout); + for (int i = 0; i < parts; ++i) { + for (int j = 0; j < Constants.MAP_OUTPUT_INDEX_RECORD_LENGTH / 8; ++j) { + if (0 == (i % 3)) { + dout.writeLong(i); + } else { + out.writeLong(i); + } + } + } + out.writeLong(iout.getChecksum().getValue()); + dout.close(); + try { + cache.getIndexInformation("badindex", 7, f, + UserGroupInformation.getCurrentUser().getShortUserName()); + fail("Did not detect bad checksum"); + } catch (IOException e) { + if (!(e.getCause() instanceof ChecksumException)) { + throw e; + } + } + } + + @Test + public void testInvalidReduceNumberOrLength() throws Exception { + fs.delete(p, true); + conf.setInt(INDEX_CACHE_MB, 1); + final int partsPerMap = 1000; + final int bytesPerFile = partsPerMap * 24; + IndexCache cache = new IndexCache(conf); + + // fill cache + Path feq = new Path(p, "invalidReduceOrPartsPerMap"); + writeFile(fs, feq, bytesPerFile, partsPerMap); + + // Number of reducers should always be less than partsPerMap as reducer + // numbers start from 0 and there cannot be more reducer than parts + + try { + // Number of reducers equal to partsPerMap + cache.getIndexInformation("reduceEqualPartsPerMap", + partsPerMap, // reduce number == partsPerMap + feq, UserGroupInformation.getCurrentUser().getShortUserName()); + fail("Number of reducers equal to partsPerMap did not fail"); + } catch (Exception e) { + if (!(e instanceof IOException)) { + throw e; + } + } + + try { + // Number of reducers more than partsPerMap + cache.getIndexInformation( + "reduceMorePartsPerMap", + partsPerMap + 1, // reduce number > partsPerMap + feq, UserGroupInformation.getCurrentUser().getShortUserName()); + fail("Number of reducers more than partsPerMap did not fail"); + } catch (Exception e) { + if (!(e instanceof IOException)) { + throw e; + } + } + } + + @Test + public void testRemoveMap() throws Exception { + // This test case use two thread to call getIndexInformation and + // removeMap concurrently, in order to construct race condition. + // This test case may not repeatable. But on my macbook this test + // fails with probability of 100% on code before MAPREDUCE-2541, + // so it is repeatable in practice. + fs.delete(p, true); + conf.setInt(INDEX_CACHE_MB, 10); + // Make a big file so removeMapThread almost surely runs faster than + // getInfoThread + final int partsPerMap = 100000; + final int bytesPerFile = partsPerMap * 24; + final IndexCache cache = new IndexCache(conf); + + final Path big = new Path(p, "bigIndex"); + final String user = + UserGroupInformation.getCurrentUser().getShortUserName(); + writeFile(fs, big, bytesPerFile, partsPerMap); + + // run multiple times + for (int i = 0; i < 20; ++i) { + Thread getInfoThread = new Thread() { + @Override + public void run() { + try { + cache.getIndexInformation("bigIndex", partsPerMap, big, user); + } catch (Exception e) { + // should not be here + } + } + }; + Thread removeMapThread = new Thread() { + @Override + public void run() { + cache.removeMap("bigIndex"); + } + }; + if (i%2==0) { + getInfoThread.start(); + removeMapThread.start(); + } else { + removeMapThread.start(); + getInfoThread.start(); + } + getInfoThread.join(); + removeMapThread.join(); + assertEquals(true, cache.checkTotalMemoryUsed()); + } + } + + @Test + public void testCreateRace() throws Exception { + fs.delete(p, true); + conf.setInt(INDEX_CACHE_MB, 1); + final int partsPerMap = 1000; + final int bytesPerFile = partsPerMap * 24; + final IndexCache cache = new IndexCache(conf); + + final Path racy = new Path(p, "racyIndex"); + final String user = + UserGroupInformation.getCurrentUser().getShortUserName(); + writeFile(fs, racy, bytesPerFile, partsPerMap); + + // run multiple instances + Thread[] getInfoThreads = new Thread[50]; + for (int i = 0; i < 50; i++) { + getInfoThreads[i] = new Thread() { + @Override + public void run() { + try { + cache.getIndexInformation("racyIndex", partsPerMap, racy, user); + cache.removeMap("racyIndex"); + } catch (Exception e) { + // should not be here + } + } + }; + } + + for (int i = 0; i < 50; i++) { + getInfoThreads[i].start(); + } + + final Thread mainTestThread = Thread.currentThread(); + + Thread timeoutThread = new Thread() { + @Override + public void run() { + try { + Thread.sleep(15000); + mainTestThread.interrupt(); + } catch (InterruptedException ie) { + // we are done; + } + } + }; + + for (int i = 0; i < 50; i++) { + try { + getInfoThreads[i].join(); + } catch (InterruptedException ie) { + // we haven't finished in time. Potential deadlock/race. + fail("Unexpectedly long delay during concurrent cache entry creations"); + } + } + // stop the timeoutThread. If we get interrupted before stopping, there + // must be something wrong, although it wasn't a deadlock. No need to + // catch and swallow. + timeoutThread.interrupt(); + } + + private static void checkRecord(TezIndexRecord rec, long fill) { + assertEquals(fill, rec.getStartOffset()); + assertEquals(fill, rec.getRawLength()); + assertEquals(fill, rec.getPartLength()); + } + + private static void writeFile(FileSystem fs, Path f, long fill, int parts) + throws IOException { + FSDataOutputStream out = fs.create(f, false); + CheckedOutputStream iout = new CheckedOutputStream(out, new CRC32()); + DataOutputStream dout = new DataOutputStream(iout); + for (int i = 0; i < parts; ++i) { + for (int j = 0; j < Constants.MAP_OUTPUT_INDEX_RECORD_LENGTH / 8; ++j) { + dout.writeLong(fill); + } + } + out.writeLong(iout.getChecksum().getValue()); + dout.close(); + } +} diff --git a/tez-plugins/tez-aux-services/src/test/java/org/apache/tez/auxservices/TestShuffleHandler.java b/tez-plugins/tez-aux-services/src/test/java/org/apache/tez/auxservices/TestShuffleHandler.java index b9fd0d2326..d32998afcc 100644 --- a/tez-plugins/tez-aux-services/src/test/java/org/apache/tez/auxservices/TestShuffleHandler.java +++ b/tez-plugins/tez-aux-services/src/test/java/org/apache/tez/auxservices/TestShuffleHandler.java @@ -20,12 +20,16 @@ //import static org.apache.hadoop.test.MetricsAsserts.assertCounter; //import static org.apache.hadoop.test.MetricsAsserts.assertGauge; //import static org.apache.hadoop.test.MetricsAsserts.getMetrics; +import org.apache.hadoop.util.DiskChecker.DiskErrorException; import static org.junit.Assert.assertTrue; -import static org.jboss.netty.buffer.ChannelBuffers.wrappedBuffer; -import static org.jboss.netty.handler.codec.http.HttpResponseStatus.OK; -import static org.jboss.netty.handler.codec.http.HttpVersion.HTTP_1_1; +import static io.netty.buffer.Unpooled.wrappedBuffer; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.fail; import static org.junit.Assume.assumeTrue; +import static org.mockito.Mockito.any; +import static org.mockito.Mockito.doAnswer; +import static org.mockito.Mockito.doReturn; +import static org.mockito.ArgumentMatchers.anyString; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; @@ -41,27 +45,31 @@ import java.net.URL; import java.nio.ByteBuffer; import java.util.ArrayList; -import java.util.Arrays; import java.util.List; import java.util.Map; -import java.util.zip.CheckedOutputStream; +import java.util.Random; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.stream.Collectors; +import java.util.stream.IntStream; import java.util.zip.Checksum; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.CommonConfigurationKeysPublic; -import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileUtil; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.DataOutputBuffer; import org.apache.hadoop.io.Text; +import org.apache.hadoop.io.WritableUtils; import org.apache.hadoop.io.nativeio.NativeIO; -import org.apache.hadoop.mapred.JobID; -import org.apache.hadoop.mapred.MapTask; -import org.apache.hadoop.mapreduce.TypeConverter; +import org.apache.hadoop.yarn.server.api.AuxiliaryLocalPathHandler; import org.apache.tez.runtime.library.common.security.SecureShuffleUtils; import org.apache.tez.common.security.JobTokenIdentifier; import org.apache.tez.common.security.JobTokenSecretManager; +import org.apache.tez.http.BaseHttpConnection; +import org.apache.tez.http.HttpConnectionParams; +import org.apache.tez.runtime.library.common.shuffle.ShuffleUtils; +import org.apache.tez.runtime.library.common.shuffle.api.ShuffleHandlerError; import org.apache.tez.runtime.library.common.shuffle.orderedgrouped.ShuffleHeader; import org.apache.hadoop.metrics2.MetricsSystem; import org.apache.hadoop.metrics2.impl.MetricsSystemImpl; @@ -75,31 +83,40 @@ import org.apache.hadoop.yarn.server.api.ApplicationInitializationContext; import org.apache.hadoop.yarn.server.api.ApplicationTerminationContext; import org.apache.hadoop.yarn.server.records.Version; -import org.jboss.netty.channel.Channel; -import org.jboss.netty.channel.ChannelFuture; -import org.jboss.netty.channel.ChannelHandlerContext; -import org.jboss.netty.channel.ChannelPipeline; -import org.jboss.netty.channel.socket.SocketChannel; -import org.jboss.netty.channel.MessageEvent; -import org.jboss.netty.channel.AbstractChannel; -import org.jboss.netty.handler.codec.http.DefaultHttpResponse; -import org.jboss.netty.handler.codec.http.HttpHeaders; -import org.jboss.netty.handler.codec.http.HttpRequest; -import org.jboss.netty.handler.codec.http.HttpResponse; -import org.jboss.netty.handler.codec.http.HttpResponseStatus; -import org.jboss.netty.handler.codec.http.HttpMethod; +import org.apache.tez.runtime.library.common.sort.impl.TezIndexRecord; +import org.apache.tez.runtime.library.common.sort.impl.TezSpillRecord; + +import io.netty.channel.Channel; +import io.netty.channel.ChannelFuture; +import io.netty.channel.ChannelHandlerContext; +import io.netty.channel.ChannelPipeline; +import io.netty.channel.socket.SocketChannel; +import io.netty.channel.AbstractChannel; +import io.netty.handler.codec.http.DefaultFullHttpRequest; +import io.netty.handler.codec.http.FullHttpRequest; +import io.netty.handler.codec.http.HttpHeaders; +import io.netty.handler.codec.http.HttpRequest; +import io.netty.handler.codec.http.HttpResponse; +import io.netty.handler.codec.http.HttpResponseStatus; +import io.netty.handler.codec.http.HttpVersion; +import io.netty.handler.codec.http.HttpMethod; import org.junit.Assert; import org.junit.Test; import org.mockito.invocation.InvocationOnMock; import org.mockito.stubbing.Answer; -import org.mockito.Mockito; import org.slf4j.Logger; import org.slf4j.LoggerFactory; public class TestShuffleHandler { static final long MiB = 1024 * 1024; private static final Logger LOG = LoggerFactory.getLogger(TestShuffleHandler.class); + private static final File TEST_DIR = new File(System.getProperty("test.build.data"), + TestShuffleHandler.class.getName()).getAbsoluteFile(); + private static final String HADOOP_TMP_DIR = "hadoop.tmp.dir"; + private static final String TEST_PARTITION_DATA_STRING = "0123456789"; + class MockShuffleHandler extends org.apache.tez.auxservices.ShuffleHandler { + private AuxiliaryLocalPathHandler pathHandler = new TestAuxiliaryLocalPathHandler(); @Override protected Shuffle getShuffle(final Configuration conf) { return new Shuffle(conf) { @@ -110,7 +127,7 @@ protected void verifyRequest(String appid, ChannelHandlerContext ctx, } @Override protected MapOutputInfo getMapOutputInfo(String dagId, String mapId, - String jobId, + Range reduceRange, String jobId, String user) throws IOException { // Do nothing. @@ -133,17 +150,47 @@ protected ChannelFuture sendMapOutput(ChannelHandlerContext ctx, new ShuffleHeader("attempt_12345_1_m_1_0", 5678, 5678, 1); DataOutputBuffer dob = new DataOutputBuffer(); header.write(dob); - ch.write(wrappedBuffer(dob.getData(), 0, dob.getLength())); + ch.writeAndFlush(wrappedBuffer(dob.getData(), 0, dob.getLength())); dob = new DataOutputBuffer(); for (int i = 0; i < 100; ++i) { header.write(dob); } - return ch.write(wrappedBuffer(dob.getData(), 0, dob.getLength())); + return ch.writeAndFlush(wrappedBuffer(dob.getData(), 0, dob.getLength())); } }; } + @Override + public AuxiliaryLocalPathHandler getAuxiliaryLocalPathHandler() { + return pathHandler; + } } + private static class TestAuxiliaryLocalPathHandler + implements AuxiliaryLocalPathHandler { + @Override + public Path getLocalPathForRead(String path) throws IOException { + return new Path(TEST_DIR.getAbsolutePath(), path); + } + + @Override + public Path getLocalPathForWrite(String path) throws IOException { + return new Path(TEST_DIR.getAbsolutePath()); + } + + @Override + public Path getLocalPathForWrite(String path, long size) + throws IOException { + return new Path(TEST_DIR.getAbsolutePath()); + } + + @Override + public Iterable getAllLocalPathsForRead(String path) + throws IOException { + ArrayList paths = new ArrayList<>(); + paths.add(new Path(TEST_DIR.getAbsolutePath(), path)); + return paths; + } + } private static class MockShuffleHandler2 extends org.apache.tez.auxservices.ShuffleHandler { boolean socketKeepAlive = false; @@ -154,8 +201,8 @@ protected Shuffle getShuffle(final Configuration conf) { protected void verifyRequest(String appid, ChannelHandlerContext ctx, HttpRequest request, HttpResponse response, URL requestUri) throws IOException { - SocketChannel channel = (SocketChannel)(ctx.getChannel()); - socketKeepAlive = channel.getConfig().isKeepAlive(); + SocketChannel channel = (SocketChannel)(ctx.channel()); + socketKeepAlive = channel.config().isKeepAlive(); } }; } @@ -165,6 +212,52 @@ protected boolean isSocketKeepAlive() { } } + class MockShuffleHandlerWithFatalDiskError extends org.apache.tez.auxservices.ShuffleHandler { + public static final String MESSAGE = + "Could not find application_1234/240/output/attempt_1234_0/file.out.index"; + + private JobTokenSecretManager secretManager = + new JobTokenSecretManager(JobTokenSecretManager.createSecretKey(getSecret().getBytes())); + + protected JobTokenSecretManager getSecretManager(){ + return secretManager; + } + + @Override + protected Shuffle getShuffle(final Configuration conf) { + return new Shuffle(conf) { + @Override + protected void verifyRequest(String appid, ChannelHandlerContext ctx, HttpRequest request, + HttpResponse response, URL requestUri) throws IOException { + super.verifyRequest(appid, ctx, request, response, requestUri); + } + + @Override + protected MapOutputInfo getMapOutputInfo(String dagId, String mapId, Range reduceRange, + String jobId, String user) { + return null; + } + + @Override + protected void populateHeaders(List mapIds, String jobId, String dagId, String user, + Range reduceRange, HttpResponse response, boolean keepAliveParam, + Map infoMap) throws IOException { + throw new DiskErrorException(MESSAGE); + } + + @Override + protected ChannelFuture sendMapOutput(ChannelHandlerContext ctx, Channel ch, String user, + String mapId, Range reduceRange, MapOutputInfo info) throws IOException { + return null; + } + }; + } + + public String getSecret() { + return "secret"; + } + } + /** * Test the validation of ShuffleHandler's meta-data's serialization and * de-serialization. @@ -204,6 +297,7 @@ public void testShuffleMetrics() throws Exception { sh.metrics.operationComplete(cf); checkShuffleMetrics(ms, 3*MiB, 1, 1, 0); + sh.close(); } static void checkShuffleMetrics(MetricsSystem ms, long bytes, int failed, @@ -225,9 +319,8 @@ static void checkShuffleMetrics(MetricsSystem ms, long bytes, int failed, */ @Test (timeout = 10000) public void testClientClosesConnection() throws Exception { - final ArrayList failures = new ArrayList(1); - Configuration conf = new Configuration(); - conf.setInt(ShuffleHandler.SHUFFLE_PORT_CONFIG_KEY, 0); + final AtomicBoolean failureEncountered = new AtomicBoolean(false); + Configuration conf = getInitialConf(); ShuffleHandler shuffleHandler = new ShuffleHandler() { @Override protected Shuffle getShuffle(Configuration conf) { @@ -235,7 +328,7 @@ protected Shuffle getShuffle(Configuration conf) { return new Shuffle(conf) { @Override protected MapOutputInfo getMapOutputInfo(String dagId, String mapId, - String jobId, + Range reduceRange, String jobId, String user) throws IOException { return null; @@ -266,27 +359,25 @@ protected ChannelFuture sendMapOutput(ChannelHandlerContext ctx, new ShuffleHeader("attempt_12345_1_m_1_0", 5678, 5678, 1); DataOutputBuffer dob = new DataOutputBuffer(); header.write(dob); - ch.write(wrappedBuffer(dob.getData(), 0, dob.getLength())); + ch.writeAndFlush(wrappedBuffer(dob.getData(), 0, dob.getLength())); dob = new DataOutputBuffer(); for (int i = 0; i < 100000; ++i) { header.write(dob); } - return ch.write(wrappedBuffer(dob.getData(), 0, dob.getLength())); + return ch.writeAndFlush(wrappedBuffer(dob.getData(), 0, dob.getLength())); } @Override protected void sendError(ChannelHandlerContext ctx, HttpResponseStatus status) { - if (failures.size() == 0) { - failures.add(new Error()); - ctx.getChannel().close(); + if (failureEncountered.compareAndSet(false, true)) { + ctx.channel().close(); } } @Override protected void sendError(ChannelHandlerContext ctx, String message, HttpResponseStatus status) { - if (failures.size() == 0) { - failures.add(new Error()); - ctx.getChannel().close(); + if (failureEncountered.compareAndSet(false, true)) { + ctx.channel().close(); } } }; @@ -313,9 +404,9 @@ protected void sendError(ChannelHandlerContext ctx, String message, header.readFields(input); input.close(); - shuffleHandler.stop(); + shuffleHandler.close(); Assert.assertTrue("sendError called when client closed connection", - failures.size() == 0); + !failureEncountered.get()); } static class LastSocketAddress { @@ -323,16 +414,15 @@ static class LastSocketAddress { void setAddress(SocketAddress lastAddress) { this.lastAddress = lastAddress; } - SocketAddress getSocketAddres() { + SocketAddress getSocketAddress() { return lastAddress; } } @Test(timeout = 10000) public void testKeepAlive() throws Exception { - final ArrayList failures = new ArrayList(1); - Configuration conf = new Configuration(); - conf.setInt(ShuffleHandler.SHUFFLE_PORT_CONFIG_KEY, 0); + final AtomicBoolean failureEncountered = new AtomicBoolean(false); + Configuration conf = getInitialConf(); conf.setBoolean(ShuffleHandler.SHUFFLE_CONNECTION_KEEP_ALIVE_ENABLED, true); // try setting to -ve keep alive timeout. conf.setInt(ShuffleHandler.SHUFFLE_CONNECTION_KEEP_ALIVE_TIME_OUT, -100); @@ -345,7 +435,7 @@ protected Shuffle getShuffle(final Configuration conf) { return new Shuffle(conf) { @Override protected MapOutputInfo getMapOutputInfo(String dagId, String mapId, - String jobId, String user) + Range reduceRange, String jobId, String user) throws IOException { return null; } @@ -387,8 +477,7 @@ protected void populateHeaders(List mapIds, String jobId, protected ChannelFuture sendMapOutput(ChannelHandlerContext ctx, Channel ch, String user, String mapId, Range reduceRange, MapOutputInfo info) throws IOException { - lastSocketAddress.setAddress(ch.getRemoteAddress()); - HttpResponse response = new DefaultHttpResponse(HTTP_1_1, OK); + lastSocketAddress.setAddress(ch.remoteAddress()); // send a shuffle header and a lot of data down the channel // to trigger a broken pipe @@ -396,29 +485,27 @@ protected ChannelFuture sendMapOutput(ChannelHandlerContext ctx, new ShuffleHeader("attempt_12345_1_m_1_0", 5678, 5678, 1); DataOutputBuffer dob = new DataOutputBuffer(); header.write(dob); - ch.write(wrappedBuffer(dob.getData(), 0, dob.getLength())); + ch.writeAndFlush(wrappedBuffer(dob.getData(), 0, dob.getLength())); dob = new DataOutputBuffer(); for (int i = 0; i < 100000; ++i) { header.write(dob); } - return ch.write(wrappedBuffer(dob.getData(), 0, dob.getLength())); + return ch.writeAndFlush(wrappedBuffer(dob.getData(), 0, dob.getLength())); } @Override protected void sendError(ChannelHandlerContext ctx, HttpResponseStatus status) { - if (failures.size() == 0) { - failures.add(new Error()); - ctx.getChannel().close(); + if (failureEncountered.compareAndSet(false, true)) { + ctx.channel().close(); } } @Override protected void sendError(ChannelHandlerContext ctx, String message, HttpResponseStatus status) { - if (failures.size() == 0) { - failures.add(new Error()); - ctx.getChannel().close(); + if (failureEncountered.compareAndSet(false, true)) { + ctx.channel().close(); } } }; @@ -449,7 +536,7 @@ protected void sendError(ChannelHandlerContext ctx, String message, header.readFields(input); byte[] buffer = new byte[1024]; while (input.read(buffer) != -1) {} - SocketAddress firstAddress = lastSocketAddress.getSocketAddres(); + SocketAddress firstAddress = lastSocketAddress.getSocketAddress(); input.close(); // For keepAlive via URL @@ -471,22 +558,27 @@ protected void sendError(ChannelHandlerContext ctx, String message, header = new ShuffleHeader(); header.readFields(input); input.close(); - SocketAddress secondAddress = lastSocketAddress.getSocketAddres(); + SocketAddress secondAddress = lastSocketAddress.getSocketAddress(); Assert.assertNotNull("Initial shuffle address should not be null", firstAddress); Assert.assertNotNull("Keep-Alive shuffle address should not be null", secondAddress); Assert.assertEquals("Initial shuffle address and keep-alive shuffle " + "address should be the same", firstAddress, secondAddress); + shuffleHandler.close(); } @Test public void testSocketKeepAlive() throws Exception { - Configuration conf = new Configuration(); - conf.setInt(ShuffleHandler.SHUFFLE_PORT_CONFIG_KEY, 0); + Configuration conf = getInitialConf(); conf.setBoolean(ShuffleHandler.SHUFFLE_CONNECTION_KEEP_ALIVE_ENABLED, true); // try setting to -ve keep alive timeout. conf.setInt(ShuffleHandler.SHUFFLE_CONNECTION_KEEP_ALIVE_TIME_OUT, -100); HttpURLConnection conn = null; MockShuffleHandler2 shuffleHandler = new MockShuffleHandler2(); + AuxiliaryLocalPathHandler pathHandler = + mock(AuxiliaryLocalPathHandler.class); + when(pathHandler.getLocalPathForRead(anyString())).thenThrow( + new IOException("Test")); + shuffleHandler.setAuxiliaryLocalPathHandler(pathHandler); try { shuffleHandler.init(conf); shuffleHandler.start(); @@ -510,7 +602,7 @@ public void testSocketKeepAlive() throws Exception { if (conn != null) { conn.disconnect(); } - shuffleHandler.stop(); + shuffleHandler.close(); } } @@ -523,8 +615,7 @@ public void testSocketKeepAlive() throws Exception { @Test (timeout = 10000) public void testIncompatibleShuffleVersion() throws Exception { final int failureNum = 3; - Configuration conf = new Configuration(); - conf.setInt(ShuffleHandler.SHUFFLE_PORT_CONFIG_KEY, 0); + Configuration conf = getInitialConf(); ShuffleHandler shuffleHandler = new ShuffleHandler(); shuffleHandler.init(conf); shuffleHandler.start(); @@ -545,7 +636,6 @@ public void testIncompatibleShuffleVersion() throws Exception { HttpURLConnection.HTTP_BAD_REQUEST, conn.getResponseCode()); } - shuffleHandler.stop(); shuffleHandler.close(); } @@ -557,8 +647,7 @@ public void testIncompatibleShuffleVersion() throws Exception { @Test (timeout = 10000) public void testMaxConnections() throws Exception { - Configuration conf = new Configuration(); - conf.setInt(ShuffleHandler.SHUFFLE_PORT_CONFIG_KEY, 0); + Configuration conf = getInitialConf(); conf.setInt(ShuffleHandler.MAX_SHUFFLE_CONNECTIONS, 3); ShuffleHandler shuffleHandler = new ShuffleHandler() { @Override @@ -567,7 +656,7 @@ protected Shuffle getShuffle(Configuration conf) { return new Shuffle(conf) { @Override protected MapOutputInfo getMapOutputInfo(String dagId, String mapId, - String jobId, + Range reduceRange, String jobId, String user) throws IOException { // Do nothing. @@ -598,12 +687,12 @@ protected ChannelFuture sendMapOutput(ChannelHandlerContext ctx, new ShuffleHeader("dummy_header", 5678, 5678, 1); DataOutputBuffer dob = new DataOutputBuffer(); header.write(dob); - ch.write(wrappedBuffer(dob.getData(), 0, dob.getLength())); + ch.writeAndFlush(wrappedBuffer(dob.getData(), 0, dob.getLength())); dob = new DataOutputBuffer(); for (int i=0; i<100000; ++i) { header.write(dob); } - return ch.write(wrappedBuffer(dob.getData(), 0, dob.getLength())); + return ch.writeAndFlush(wrappedBuffer(dob.getData(), 0, dob.getLength())); } }; } @@ -630,6 +719,10 @@ protected ChannelFuture sendMapOutput(ChannelHandlerContext ctx, // Try to open numerous connections for (int i = 0; i < connAttempts; i++) { + // connections should be made in a bit relaxed way, otherwise + // non-synced channelActive method will mess them up + Thread.sleep(200); + conns[i].connect(); } @@ -653,7 +746,181 @@ protected ChannelFuture sendMapOutput(ChannelHandlerContext ctx, Assert.fail("Expected a SocketException"); } - shuffleHandler.stop(); + shuffleHandler.close(); + } + + /** + * Validate the ranged fetch works as expected + */ + @Test(timeout = 10000) + public void testRangedFetch() throws IOException { + Configuration conf = getInitialConf(); + conf.setInt(ShuffleHandler.MAX_SHUFFLE_CONNECTIONS, 3); + conf.set(CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTHENTICATION, + "simple"); + UserGroupInformation.setConfiguration(conf); + conf.set(YarnConfiguration.NM_LOCAL_DIRS, TEST_DIR.getAbsolutePath()); + ApplicationId appId = ApplicationId.newInstance(12345, 1); + LOG.info(appId.toString()); + String appAttemptId = "attempt_12345_1_m_1_0"; + String user = "randomUser"; + String reducerIdStart = "0"; + String reducerIdEnd = "1"; + List fileMap = new ArrayList<>(); + createShuffleHandlerFiles(TEST_DIR, user, appId.toString(), appAttemptId, + conf, fileMap); + ShuffleHandler shuffleHandler = getShuffleHandlerWithNoVerify(); + shuffleHandler.init(conf); + try { + shuffleHandler.start(); + DataOutputBuffer outputBuffer = new DataOutputBuffer(); + outputBuffer.reset(); + Token jt = + new Token("identifier".getBytes(), + "password".getBytes(), new Text(user), new Text("shuffleService")); + jt.write(outputBuffer); + shuffleHandler + .initializeApplication(new ApplicationInitializationContext(user, + appId, ByteBuffer.wrap(outputBuffer.getData(), 0, + outputBuffer.getLength()))); + URL url = + new URL( + "http://127.0.0.1:" + + shuffleHandler.getConfig().get( + ShuffleHandler.SHUFFLE_PORT_CONFIG_KEY) + + "/mapOutput?job=job_12345_0001&dag=1&reduce=" + reducerIdStart + "-" + reducerIdEnd + + "&map=attempt_12345_1_m_1_0"); + HttpURLConnection conn = (HttpURLConnection) url.openConnection(); + conn.setRequestProperty(ShuffleHeader.HTTP_HEADER_NAME, + ShuffleHeader.DEFAULT_HTTP_HEADER_NAME); + conn.setRequestProperty(ShuffleHeader.HTTP_HEADER_VERSION, + ShuffleHeader.DEFAULT_HTTP_HEADER_VERSION); + conn.connect(); + boolean succeeded = false; + try { + DataInputStream is = new DataInputStream(conn.getInputStream()); + int partitionCount = WritableUtils.readVInt(is); + List headers = new ArrayList<>(2); + for (int i = 0; i < partitionCount; i++) { + ShuffleHeader header = new ShuffleHeader(); + header.readFields(is); + Assert.assertEquals("Incorrect map id", "attempt_12345_1_m_1_0", header.getMapId()); + Assert.assertEquals("Incorrect reduce id", i, header.getPartition()); + headers.add(header); + } + for (ShuffleHeader header: headers) { + byte[] bytes = new byte[(int)header.getCompressedLength()]; + is.read(bytes); + } + succeeded = true; + // Read one more byte to force EOF + is.readByte(); + Assert.fail("More fetch bytes that expected in stream"); + } catch (EOFException e) { + Assert.assertTrue("Failed to copy ranged fetch", succeeded); + } + + } finally { + shuffleHandler.close(); + FileUtil.fullyDelete(TEST_DIR); + } + } + + /** + * Validate the ranged fetch works as expected for different amount of map attempts and reduce ranges. + */ + @Test(timeout = 30000) + public void testRangedFetchMultipleAttempts() throws IOException { + runMultiAttemptMultiRangeShuffleTest(/*attemptRange*/1, /*reduceRange*/1); + runMultiAttemptMultiRangeShuffleTest(/*attemptRange*/5, /*reduceRange*/1); + runMultiAttemptMultiRangeShuffleTest(/*attemptRange*/10, /*reduceRange*/1); + runMultiAttemptMultiRangeShuffleTest(/*attemptRange*/100, /*reduceRange*/1); + runMultiAttemptMultiRangeShuffleTest(/*attemptRange*/1, /*reduceRange*/5); + runMultiAttemptMultiRangeShuffleTest(/*attemptRange*/5, /*reduceRange*/5); + runMultiAttemptMultiRangeShuffleTest(/*attemptRange*/10, /*reduceRange*/5); + runMultiAttemptMultiRangeShuffleTest(/*attemptRange*/100, /*reduceRange*/5); + runMultiAttemptMultiRangeShuffleTest(/*attemptRange*/1, /*reduceRange*/10); + runMultiAttemptMultiRangeShuffleTest(/*attemptRange*/5, /*reduceRange*/10); + runMultiAttemptMultiRangeShuffleTest(/*attemptRange*/10, /*reduceRange*/10); + runMultiAttemptMultiRangeShuffleTest(/*attemptRange*/100, /*reduceRange*/10); + runMultiAttemptMultiRangeShuffleTest(/*attemptRange*/1, /*reduceRange*/100); + runMultiAttemptMultiRangeShuffleTest(/*attemptRange*/5, /*reduceRange*/100); + runMultiAttemptMultiRangeShuffleTest(/*attemptRange*/10, /*reduceRange*/100); + runMultiAttemptMultiRangeShuffleTest(/*attemptRange*/100, /*reduceRange*/100); + } + + private void runMultiAttemptMultiRangeShuffleTest(int attemptRange, int reduceRange) throws IOException { + Random random = new Random(); + String user = "randomUser"; + int firstAttempt = random.nextInt(10); + int reducerIdStart = random.nextInt(10); + int reducerIdEnd = reducerIdStart + reduceRange - 1; + + Configuration conf = getInitialConf(); + conf.setInt(ShuffleHandler.MAX_SHUFFLE_CONNECTIONS, 3); + conf.setInt(ShuffleHandler.SHUFFLE_MAX_SESSION_OPEN_FILES, 3); + conf.set(CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTHENTICATION, "simple"); + UserGroupInformation.setConfiguration(conf); + conf.set(YarnConfiguration.NM_LOCAL_DIRS, TEST_DIR.getAbsolutePath()); + ApplicationId appId = ApplicationId.newInstance(12345, 1); + LOG.info(appId.toString()); + List attemptIds = IntStream.range(firstAttempt, firstAttempt + attemptRange) + .mapToObj(i -> "attempt_12345_1_m_" + i + "_0").collect(Collectors.toList()); + List fileMap = new ArrayList<>(); + for (String attemptId : attemptIds) { + createShuffleHandlerFiles(TEST_DIR, user, appId.toString(), attemptId, conf, fileMap, reducerIdStart, + reducerIdEnd); + } + ShuffleHandler shuffleHandler = getShuffleHandlerWithNoVerify(); + shuffleHandler.init(conf); + try { + shuffleHandler.start(); + DataOutputBuffer outputBuffer = new DataOutputBuffer(); + outputBuffer.reset(); + Token jt = new Token("identifier".getBytes(), "password".getBytes(), + new Text(user), new Text("shuffleService")); + jt.write(outputBuffer); + shuffleHandler.initializeApplication(new ApplicationInitializationContext(user, appId, + ByteBuffer.wrap(outputBuffer.getData(), 0, outputBuffer.getLength()))); + URL url = new URL("http://127.0.0.1:" + shuffleHandler.getConfig().get(ShuffleHandler.SHUFFLE_PORT_CONFIG_KEY) + + "/mapOutput?job=job_12345_0001&dag=1&reduce=" + reducerIdStart + "-" + reducerIdEnd + "&map=" + + String.join(",", attemptIds)); + LOG.info("Calling shuffle URL: {}", url); + HttpURLConnection conn = (HttpURLConnection) url.openConnection(); + conn.setRequestProperty(ShuffleHeader.HTTP_HEADER_NAME, ShuffleHeader.DEFAULT_HTTP_HEADER_NAME); + conn.setRequestProperty(ShuffleHeader.HTTP_HEADER_VERSION, ShuffleHeader.DEFAULT_HTTP_HEADER_VERSION); + conn.connect(); + boolean succeeded = false; + try { + DataInputStream is = new DataInputStream(conn.getInputStream()); + for (String attempt : attemptIds) { + int partitionCount = WritableUtils.readVInt(is); + List headers = new ArrayList<>(partitionCount); + for (int i = reducerIdStart; i <= reducerIdEnd; i++) { + ShuffleHeader header = new ShuffleHeader(); + header.readFields(is); + Assert.assertEquals("Incorrect map id", attempt, header.getMapId()); + Assert.assertEquals("Incorrect reduce id", i, header.getPartition()); + headers.add(header); + } + for (ShuffleHeader header : headers) { + byte[] bytes = new byte[(int) header.getCompressedLength()]; + is.read(bytes); + Assert.assertEquals(TEST_PARTITION_DATA_STRING, new String(bytes)); + } + } + succeeded = true; + // Read one more byte to force EOF + is.readByte(); + Assert.fail("More fetch bytes that expected in stream"); + } catch (EOFException e) { + Assert.assertTrue("Failed to copy ranged fetch", succeeded); + } + + } finally { + shuffleHandler.close(); + FileUtil.fullyDelete(TEST_DIR); + } } /** @@ -666,40 +933,21 @@ protected ChannelFuture sendMapOutput(ChannelHandlerContext ctx, public void testMapFileAccess() throws IOException { // This will run only in NativeIO is enabled as SecureIOUtils need it assumeTrue(NativeIO.isAvailable()); - Configuration conf = new Configuration(); - conf.setInt(ShuffleHandler.SHUFFLE_PORT_CONFIG_KEY, 0); + Configuration conf = getInitialConf(); conf.setInt(ShuffleHandler.MAX_SHUFFLE_CONNECTIONS, 3); conf.set(CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTHENTICATION, "kerberos"); UserGroupInformation.setConfiguration(conf); - File absLogDir = new File("target", - TestShuffleHandler.class.getSimpleName() + "LocDir").getAbsoluteFile(); - conf.set(YarnConfiguration.NM_LOCAL_DIRS, absLogDir.getAbsolutePath()); + conf.set(YarnConfiguration.NM_LOCAL_DIRS, TEST_DIR.getAbsolutePath()); ApplicationId appId = ApplicationId.newInstance(12345, 1); LOG.info(appId.toString()); String appAttemptId = "attempt_12345_1_m_1_0"; String user = "randomUser"; String reducerId = "0"; List fileMap = new ArrayList(); - createShuffleHandlerFiles(absLogDir, user, appId.toString(), appAttemptId, + createShuffleHandlerFiles(TEST_DIR, user, appId.toString(), appAttemptId, conf, fileMap); - ShuffleHandler shuffleHandler = new ShuffleHandler() { - - @Override - protected Shuffle getShuffle(Configuration conf) { - // replace the shuffle handler with one stubbed for testing - return new Shuffle(conf) { - - @Override - protected void verifyRequest(String appid, ChannelHandlerContext ctx, - HttpRequest request, HttpResponse response, URL requestUri) - throws IOException { - // Do nothing. - } - - }; - } - }; + ShuffleHandler shuffleHandler = getShuffleHandlerWithNoVerify(); shuffleHandler.init(conf); try { shuffleHandler.start(); @@ -743,71 +991,70 @@ protected void verifyRequest(String appid, ChannelHandlerContext ctx, + " did not match expected owner '" + user + "'"; Assert.assertTrue((new String(byteArr)).contains(message)); } finally { - shuffleHandler.stop(); - FileUtil.fullyDelete(absLogDir); + shuffleHandler.close(); + FileUtil.fullyDelete(TEST_DIR); } } - private static void createShuffleHandlerFiles(File logDir, String user, - String appId, String appAttemptId, Configuration conf, - List fileMap) throws IOException { - String attemptDir = - StringUtils.join(Path.SEPARATOR, - new String[] { logDir.getAbsolutePath(), - ShuffleHandler.USERCACHE, user, - ShuffleHandler.APPCACHE, appId,"dag_1/" + "output", - appAttemptId }); + private static void createShuffleHandlerFiles(File logDir, String user, String appId, String appAttemptId, + Configuration conf, List fileMap) throws IOException { + createShuffleHandlerFiles(logDir, user, appId, appAttemptId, conf, fileMap, 0, 1); + } + + private static void createShuffleHandlerFiles(File logDir, String user, String appId, String appAttemptId, + Configuration conf, List fileMap, int reduceStart, int reduceEnd) throws IOException { + String attemptDir = StringUtils.join(Path.SEPARATOR, new String[] { logDir.getAbsolutePath(), + ShuffleHandler.USERCACHE, user, ShuffleHandler.APPCACHE, appId, "dag_1/" + "output", appAttemptId }); File appAttemptDir = new File(attemptDir); appAttemptDir.mkdirs(); - System.out.println(appAttemptDir.getAbsolutePath()); + LOG.info(appAttemptDir.getAbsolutePath()); File indexFile = new File(appAttemptDir, "file.out.index"); fileMap.add(indexFile); - createIndexFile(indexFile, conf); + createIndexFile(indexFile, conf, reduceStart, reduceEnd); File mapOutputFile = new File(appAttemptDir, "file.out"); fileMap.add(mapOutputFile); - createMapOutputFile(mapOutputFile, conf); + createMapOutputFile(mapOutputFile, conf, reduceEnd - reduceStart + 1); } - private static void - createMapOutputFile(File mapOutputFile, Configuration conf) - throws IOException { + private static void createMapOutputFile(File mapOutputFile, Configuration conf, int partitionCount) + throws IOException { FileOutputStream out = new FileOutputStream(mapOutputFile); - out.write("Creating new dummy map output file. Used only for testing" - .getBytes()); + + StringBuilder b = new StringBuilder(partitionCount * TEST_PARTITION_DATA_STRING.length()); + for (int i = 0; i < partitionCount; i++) { + b.append(TEST_PARTITION_DATA_STRING); + } + + out.write(b.toString().getBytes()); out.flush(); out.close(); } - private static void createIndexFile(File indexFile, Configuration conf) + private static void createIndexFile(File indexFile, Configuration conf, int reduceStart, int reduceEnd) throws IOException { if (indexFile.exists()) { - System.out.println("Deleting existing file"); + LOG.info("Deleting existing file"); indexFile.delete(); } - indexFile.createNewFile(); - FSDataOutputStream output = FileSystem.getLocal(conf).getRaw().append( - new Path(indexFile.getAbsolutePath())); Checksum crc = new PureJavaCrc32(); - crc.reset(); - CheckedOutputStream chk = new CheckedOutputStream(output, crc); - String msg = "Writing new index file. This file will be used only " + - "for the testing."; - chk.write(Arrays.copyOf(msg.getBytes(), - MapTask.MAP_OUTPUT_INDEX_RECORD_LENGTH)); - output.writeLong(chk.getChecksum().getValue()); - output.close(); + TezSpillRecord tezSpillRecord = new TezSpillRecord(reduceEnd + 1); + int offset = 0; + for (int i = reduceStart; i <= reduceEnd; i++) { + tezSpillRecord.putIndex( + new TezIndexRecord(offset, TEST_PARTITION_DATA_STRING.length(), TEST_PARTITION_DATA_STRING.length()), i); + offset += TEST_PARTITION_DATA_STRING.length(); + } + tezSpillRecord.writeToFile(new Path(indexFile.getAbsolutePath()), conf, FileSystem.getLocal(conf).getRaw(), crc); } @Test public void testRecovery() throws IOException { final String user = "someuser"; final ApplicationId appId = ApplicationId.newInstance(12345, 1); - final JobID jobId = JobID.downgrade(TypeConverter.fromYarn(appId)); final File tmpDir = new File(System.getProperty("test.build.data", System.getProperty("java.io.tmpdir")), TestShuffleHandler.class.getName()); - Configuration conf = new Configuration(); - conf.setInt(ShuffleHandler.SHUFFLE_PORT_CONFIG_KEY, 0); + Configuration conf = getInitialConf(); conf.setInt(ShuffleHandler.MAX_SHUFFLE_CONNECTIONS, 3); ShuffleHandler shuffle = new ShuffleHandler(); // emulate aux services startup with recovery enabled @@ -873,8 +1120,7 @@ public void testRecoveryFromOtherVersions() throws IOException { final File tmpDir = new File(System.getProperty("test.build.data", System.getProperty("java.io.tmpdir")), TestShuffleHandler.class.getName()); - Configuration conf = new Configuration(); - conf.setInt(ShuffleHandler.SHUFFLE_PORT_CONFIG_KEY, 0); + Configuration conf = getInitialConf(); conf.setInt(ShuffleHandler.MAX_SHUFFLE_CONNECTIONS, 3); ShuffleHandler shuffle = new ShuffleHandler(); // emulate aux services startup with recovery enabled @@ -978,24 +1224,22 @@ private static int getShuffleResponseCode(ShuffleHandler shuffle, @Test(timeout = 100000) public void testGetMapOutputInfo() throws Exception { - final ArrayList failures = new ArrayList(1); - Configuration conf = new Configuration(); - conf.setInt(ShuffleHandler.SHUFFLE_PORT_CONFIG_KEY, 0); + final AtomicBoolean failureEncountered = new AtomicBoolean(false); + Configuration conf = getInitialConf(); conf.setInt(ShuffleHandler.MAX_SHUFFLE_CONNECTIONS, 3); conf.set(CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTHENTICATION, "simple"); UserGroupInformation.setConfiguration(conf); - File absLogDir = new File("target", TestShuffleHandler.class. - getSimpleName() + "LocDir").getAbsoluteFile(); - conf.set(YarnConfiguration.NM_LOCAL_DIRS, absLogDir.getAbsolutePath()); + conf.set(YarnConfiguration.NM_LOCAL_DIRS, TEST_DIR.getAbsolutePath()); ApplicationId appId = ApplicationId.newInstance(12345, 1); String appAttemptId = "attempt_12345_1_m_1_0"; String user = "randomUser"; String reducerId = "0"; List fileMap = new ArrayList(); - createShuffleHandlerFiles(absLogDir, user, appId.toString(), appAttemptId, + createShuffleHandlerFiles(TEST_DIR, user, appId.toString(), appAttemptId, conf, fileMap); ShuffleHandler shuffleHandler = new ShuffleHandler() { + private AuxiliaryLocalPathHandler pathHandler = new TestAuxiliaryLocalPathHandler(); @Override protected Shuffle getShuffle(Configuration conf) { // replace the shuffle handler with one stubbed for testing @@ -1019,9 +1263,8 @@ protected void verifyRequest(String appid, @Override protected void sendError(ChannelHandlerContext ctx, String message, HttpResponseStatus status) { - if (failures.size() == 0) { - failures.add(new Error(message)); - ctx.getChannel().close(); + if (failureEncountered.compareAndSet(false, true)) { + ctx.channel().close(); } } @Override @@ -1033,10 +1276,14 @@ protected ChannelFuture sendMapOutput(ChannelHandlerContext ctx, new ShuffleHeader("attempt_12345_1_m_1_0", 5678, 5678, 1); DataOutputBuffer dob = new DataOutputBuffer(); header.write(dob); - return ch.write(wrappedBuffer(dob.getData(), 0, dob.getLength())); + return ch.writeAndFlush(wrappedBuffer(dob.getData(), 0, dob.getLength())); } }; } + @Override + public AuxiliaryLocalPathHandler getAuxiliaryLocalPathHandler() { + return pathHandler; + } }; shuffleHandler.init(conf); try { @@ -1073,32 +1320,30 @@ protected ChannelFuture sendMapOutput(ChannelHandlerContext ctx, // ignore } Assert.assertEquals("sendError called due to shuffle error", - 0, failures.size()); + false, failureEncountered.get()); } finally { - shuffleHandler.stop(); - FileUtil.fullyDelete(absLogDir); + shuffleHandler.close(); + FileUtil.fullyDelete(TEST_DIR); } } @Test(timeout = 5000) public void testDagDelete() throws Exception { - final ArrayList failures = new ArrayList(1); - Configuration conf = new Configuration(); + final AtomicBoolean failureEncountered = new AtomicBoolean(false); + Configuration conf = getInitialConf(); conf.setInt(ShuffleHandler.MAX_SHUFFLE_CONNECTIONS, 3); - conf.setInt(ShuffleHandler.SHUFFLE_PORT_CONFIG_KEY, 0); conf.set(CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTHENTICATION, "simple"); UserGroupInformation.setConfiguration(conf); - File absLogDir = new File("target", TestShuffleHandler.class. - getSimpleName() + "LocDir").getAbsoluteFile(); - conf.set(YarnConfiguration.NM_LOCAL_DIRS, absLogDir.getAbsolutePath()); + conf.set(YarnConfiguration.NM_LOCAL_DIRS, TEST_DIR.getAbsolutePath()); ApplicationId appId = ApplicationId.newInstance(12345, 1); String appAttemptId = "attempt_12345_1_m_1_0"; String user = "randomUser"; List fileMap = new ArrayList(); - createShuffleHandlerFiles(absLogDir, user, appId.toString(), appAttemptId, + createShuffleHandlerFiles(TEST_DIR, user, appId.toString(), appAttemptId, conf, fileMap); ShuffleHandler shuffleHandler = new ShuffleHandler() { + private AuxiliaryLocalPathHandler pathHandler = new TestAuxiliaryLocalPathHandler(); @Override protected Shuffle getShuffle(Configuration conf) { // replace the shuffle handler with one stubbed for testing @@ -1106,13 +1351,16 @@ protected Shuffle getShuffle(Configuration conf) { @Override protected void sendError(ChannelHandlerContext ctx, String message, HttpResponseStatus status) { - if (failures.size() == 0) { - failures.add(new Error(message)); - ctx.getChannel().close(); + if (failureEncountered.compareAndSet(false, true)) { + ctx.channel().close(); } } }; } + @Override + public AuxiliaryLocalPathHandler getAuxiliaryLocalPathHandler() { + return pathHandler; + } }; shuffleHandler.init(conf); try { @@ -1140,7 +1388,7 @@ protected void sendError(ChannelHandlerContext ctx, String message, ShuffleHeader.DEFAULT_HTTP_HEADER_VERSION); String dagDirStr = StringUtils.join(Path.SEPARATOR, - new String[] { absLogDir.getAbsolutePath(), + new String[] { TEST_DIR.getAbsolutePath(), ShuffleHandler.USERCACHE, user, ShuffleHandler.APPCACHE, appId.toString(),"dag_1/"}); File dagDir = new File(dagDirStr); @@ -1153,11 +1401,184 @@ protected void sendError(ChannelHandlerContext ctx, String message, } catch (EOFException e) { // ignore } + Assert.assertEquals("sendError called due to shuffle error", + false, failureEncountered.get()); + } finally { + shuffleHandler.close(); + FileUtil.fullyDelete(TEST_DIR); + } + } + + @Test + public void testVertexShuffleDelete() throws Exception { + final ArrayList failures = new ArrayList(1); + Configuration conf = getInitialConf(); + conf.setInt(ShuffleHandler.MAX_SHUFFLE_CONNECTIONS, 3); + conf.set(CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTHENTICATION, + "simple"); + UserGroupInformation.setConfiguration(conf); + conf.set(YarnConfiguration.NM_LOCAL_DIRS, TEST_DIR.getAbsolutePath()); + ApplicationId appId = ApplicationId.newInstance(12345L, 1); + String appAttemptId = "attempt_12345_0001_1_00_000000_0_10003_0"; + String user = "randomUser"; + List fileMap = new ArrayList(); + String vertexDirStr = StringUtils.join(Path.SEPARATOR, new String[] { TEST_DIR.getAbsolutePath(), + ShuffleHandler.USERCACHE, user, ShuffleHandler.APPCACHE, appId.toString(), "dag_1/output/" + appAttemptId}); + File vertexDir = new File(vertexDirStr); + Assert.assertFalse("vertex directory should not be present", vertexDir.exists()); + createShuffleHandlerFiles(TEST_DIR, user, appId.toString(), appAttemptId, + conf, fileMap); + ShuffleHandler shuffleHandler = new ShuffleHandler() { + private AuxiliaryLocalPathHandler pathHandler = new TestAuxiliaryLocalPathHandler(); + @Override + protected Shuffle getShuffle(Configuration conf) { + // replace the shuffle handler with one stubbed for testing + return new Shuffle(conf) { + @Override + protected void verifyRequest(String appid, ChannelHandlerContext ctx, HttpRequest request, + HttpResponse response, URL requestUri) throws IOException { + // Do nothing. + } + @Override + protected void sendError(ChannelHandlerContext ctx, String message, + HttpResponseStatus status) { + if (failures.size() == 0) { + failures.add(new Error(message)); + ctx.channel().close(); + } + } + }; + } + @Override + public AuxiliaryLocalPathHandler getAuxiliaryLocalPathHandler() { + return pathHandler; + } + }; + shuffleHandler.init(conf); + try { + shuffleHandler.start(); + DataOutputBuffer outputBuffer = new DataOutputBuffer(); + outputBuffer.reset(); + Token jt = + new Token("identifier".getBytes(), + "password".getBytes(), new Text(user), new Text("shuffleService")); + jt.write(outputBuffer); + shuffleHandler + .initializeApplication(new ApplicationInitializationContext(user, + appId, ByteBuffer.wrap(outputBuffer.getData(), 0, + outputBuffer.getLength()))); + URL url = + new URL( + "http://127.0.0.1:" + + shuffleHandler.getConfig().get( + ShuffleHandler.SHUFFLE_PORT_CONFIG_KEY) + + "/mapOutput?vertexAction=delete&job=job_12345_0001&dag=1&vertex=00"); + HttpURLConnection conn = (HttpURLConnection) url.openConnection(); + conn.setRequestProperty(ShuffleHeader.HTTP_HEADER_NAME, + ShuffleHeader.DEFAULT_HTTP_HEADER_NAME); + conn.setRequestProperty(ShuffleHeader.HTTP_HEADER_VERSION, + ShuffleHeader.DEFAULT_HTTP_HEADER_VERSION); + Assert.assertTrue("Attempt Directory does not exist!", vertexDir.exists()); + conn.connect(); + try { + DataInputStream is = new DataInputStream(conn.getInputStream()); + is.close(); + Assert.assertFalse("Vertex Directory was not deleted", vertexDir.exists()); + } catch (EOFException e) { + fail("Encountered Exception!" + e.getMessage()); + } + } finally { + shuffleHandler.close(); + FileUtil.fullyDelete(TEST_DIR); + } + } + + @Test(timeout = 5000) + public void testFailedTaskAttemptDelete() throws Exception { + final ArrayList failures = new ArrayList(1); + Configuration conf = getInitialConf(); + conf.setInt(ShuffleHandler.MAX_SHUFFLE_CONNECTIONS, 3); + conf.set(CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTHENTICATION, + "simple"); + UserGroupInformation.setConfiguration(conf); + conf.set(YarnConfiguration.NM_LOCAL_DIRS, TEST_DIR.getAbsolutePath()); + ApplicationId appId = ApplicationId.newInstance(12345, 1); + String appAttemptId = "attempt_12345_1_m_1_0"; + String user = "randomUser"; + List fileMap = new ArrayList(); + String taskAttemptDirStr = + StringUtils.join(Path.SEPARATOR, + new String[] {TEST_DIR.getAbsolutePath(), + ShuffleHandler.USERCACHE, user, + ShuffleHandler.APPCACHE, appId.toString(), "dag_1/output/", appAttemptId}); + File taskAttemptDir = new File(taskAttemptDirStr); + Assert.assertFalse("Task Attempt Directory should not exist", taskAttemptDir.exists()); + createShuffleHandlerFiles(TEST_DIR, user, appId.toString(), appAttemptId, + conf, fileMap); + ShuffleHandler shuffleHandler = new ShuffleHandler() { + private AuxiliaryLocalPathHandler pathHandler = new TestAuxiliaryLocalPathHandler(); + @Override + protected Shuffle getShuffle(Configuration conf) { + // replace the shuffle handler with one stubbed for testing + return new Shuffle(conf) { + @Override + protected void verifyRequest(String appid, ChannelHandlerContext ctx, HttpRequest request, + HttpResponse response, URL requestUri) throws IOException { + // Do nothing. + } + @Override + protected void sendError(ChannelHandlerContext ctx, String message, + HttpResponseStatus status) { + if (failures.size() == 0) { + failures.add(new Error(message)); + ctx.channel().close(); + } + } + }; + } + @Override + public AuxiliaryLocalPathHandler getAuxiliaryLocalPathHandler() { + return pathHandler; + } + }; + shuffleHandler.init(conf); + try { + shuffleHandler.start(); + DataOutputBuffer outputBuffer = new DataOutputBuffer(); + outputBuffer.reset(); + Token jt = + new Token("identifier".getBytes(), + "password".getBytes(), new Text(user), new Text("shuffleService")); + jt.write(outputBuffer); + shuffleHandler + .initializeApplication(new ApplicationInitializationContext(user, + appId, ByteBuffer.wrap(outputBuffer.getData(), 0, + outputBuffer.getLength()))); + URL url = + new URL( + "http://127.0.0.1:" + + shuffleHandler.getConfig().get( + ShuffleHandler.SHUFFLE_PORT_CONFIG_KEY) + + "/mapOutput?taskAttemptAction=delete&job=job_12345_0001&dag=1&map=" + appAttemptId); + HttpURLConnection conn = (HttpURLConnection) url.openConnection(); + conn.setRequestProperty(ShuffleHeader.HTTP_HEADER_NAME, + ShuffleHeader.DEFAULT_HTTP_HEADER_NAME); + conn.setRequestProperty(ShuffleHeader.HTTP_HEADER_VERSION, + ShuffleHeader.DEFAULT_HTTP_HEADER_VERSION); + Assert.assertTrue("Task Attempt Directory does not exist!", taskAttemptDir.exists()); + conn.connect(); + try { + DataInputStream is = new DataInputStream(conn.getInputStream()); + is.close(); + Assert.assertFalse("Task Attempt file was not deleted!", taskAttemptDir.exists()); + } catch (EOFException e) { + // ignore + } Assert.assertEquals("sendError called due to shuffle error", 0, failures.size()); } finally { - shuffleHandler.stop(); - FileUtil.fullyDelete(absLogDir); + shuffleHandler.close(); + FileUtil.fullyDelete(TEST_DIR); } } @@ -1168,39 +1589,31 @@ public void testSendMapCount() throws Exception { final ChannelHandlerContext mockCtx = mock(ChannelHandlerContext.class); - final MessageEvent mockEvt = mock(MessageEvent.class); final Channel mockCh = mock(AbstractChannel.class); - final ChannelPipeline mockPipeline = Mockito.mock(ChannelPipeline.class); + final ChannelPipeline mockPipeline = mock(ChannelPipeline.class); // Mock HttpRequest and ChannelFuture - final HttpRequest mockHttpRequest = createMockHttpRequest(); + final FullHttpRequest httpRequest = createHttpRequest(); final ChannelFuture mockFuture = createMockChannelFuture(mockCh, listenerList); final ShuffleHandler.TimeoutHandler timerHandler = new ShuffleHandler.TimeoutHandler(); // Mock Netty Channel Context and Channel behavior - Mockito.doReturn(mockCh).when(mockCtx).getChannel(); - Mockito.when(mockCh.getPipeline()).thenReturn(mockPipeline); - Mockito.when(mockPipeline.get(Mockito.any(String.class))).thenReturn(timerHandler); - when(mockCtx.getChannel()).thenReturn(mockCh); - Mockito.doReturn(mockFuture).when(mockCh).write(Mockito.any(Object.class)); - when(mockCh.write(Object.class)).thenReturn(mockFuture); - - //Mock MessageEvent behavior - Mockito.doReturn(mockCh).when(mockEvt).getChannel(); - when(mockEvt.getChannel()).thenReturn(mockCh); - Mockito.doReturn(mockHttpRequest).when(mockEvt).getMessage(); + doReturn(mockCh).when(mockCtx).channel(); + when(mockCh.pipeline()).thenReturn(mockPipeline); + when(mockPipeline.get(any(String.class))).thenReturn(timerHandler); + when(mockCtx.channel()).thenReturn(mockCh); + doReturn(mockFuture).when(mockCh).writeAndFlush(any()); + when(mockCh.writeAndFlush(Object.class)).thenReturn(mockFuture); final ShuffleHandler sh = new MockShuffleHandler(); - Configuration conf = new Configuration(); - // The Shuffle handler port associated with the service is bound to but not used. - conf.setInt(ShuffleHandler.SHUFFLE_PORT_CONFIG_KEY, 0); + Configuration conf = getInitialConf(); sh.init(conf); sh.start(); int maxOpenFiles =conf.getInt(ShuffleHandler.SHUFFLE_MAX_SESSION_OPEN_FILES, ShuffleHandler.DEFAULT_SHUFFLE_MAX_SESSION_OPEN_FILES); - sh.getShuffle(conf).messageReceived(mockCtx, mockEvt); + sh.getShuffle(conf).channelRead(mockCtx, httpRequest); assertTrue("Number of Open files should not exceed the configured " + "value!-Not Expected", listenerList.size() <= maxOpenFiles); @@ -1213,12 +1626,58 @@ public void testSendMapCount() throws Exception { sh.close(); } + @Test + public void testShuffleHandlerSendsDiskError() throws Exception { + Configuration conf = getInitialConf(); + + DataInputStream input = null; + MockShuffleHandlerWithFatalDiskError shuffleHandler = + new MockShuffleHandlerWithFatalDiskError(); + try { + shuffleHandler.init(conf); + shuffleHandler.start(); + + String shuffleBaseURL = "http://127.0.0.1:" + + shuffleHandler.getConfig().get(ShuffleHandler.SHUFFLE_PORT_CONFIG_KEY); + URL url = new URL( + shuffleBaseURL + "/mapOutput?job=job_12345_1&dag=1&reduce=1&map=attempt_12345_1_m_1_0"); + shuffleHandler.secretManager.addTokenForJob("job_12345_1", + new Token<>("id".getBytes(), shuffleHandler.getSecret().getBytes(), null, null)); + + HttpConnectionParams httpConnectionParams = ShuffleUtils.getHttpConnectionParams(conf); + BaseHttpConnection httpConnection = ShuffleUtils.getHttpConnection(true, url, + httpConnectionParams, "testFetcher", shuffleHandler.secretManager); + + boolean connectSucceeded = httpConnection.connect(); + Assert.assertTrue(connectSucceeded); + + input = httpConnection.getInputStream(); + httpConnection.validate(); + + ShuffleHeader header = new ShuffleHeader(); + header.readFields(input); + + // message is encoded in the shuffle header, and can be checked by fetchers + Assert.assertEquals( + ShuffleHandlerError.DISK_ERROR_EXCEPTION + ": " + MockShuffleHandlerWithFatalDiskError.MESSAGE, + header.getMapId()); + Assert.assertEquals(-1, header.getCompressedLength()); + Assert.assertEquals(-1, header.getUncompressedLength()); + Assert.assertEquals(-1, header.getPartition()); + } finally { + if (input != null) { + input.close(); + } + shuffleHandler.close(); + } + } + public ChannelFuture createMockChannelFuture(Channel mockCh, final List listenerList) { final ChannelFuture mockFuture = mock(ChannelFuture.class); - when(mockFuture.getChannel()).thenReturn(mockCh); - Mockito.doReturn(true).when(mockFuture).isSuccess(); - Mockito.doAnswer(new Answer() { + when(mockFuture.channel()).thenReturn(mockCh); + doReturn(true).when(mockFuture).isSuccess(); + doAnswer(new Answer() { @Override public Object answer(InvocationOnMock invocation) throws Throwable { //Add ReduceMapFileCount listener to a list @@ -1228,23 +1687,76 @@ public Object answer(InvocationOnMock invocation) throws Throwable { invocation.getArguments()[0]); return null; } - }).when(mockFuture).addListener(Mockito.any( + }).when(mockFuture).addListener(any( ShuffleHandler.ReduceMapFileCount.class)); return mockFuture; } - public HttpRequest createMockHttpRequest() { - HttpRequest mockHttpRequest = mock(HttpRequest.class); - Mockito.doReturn(HttpMethod.GET).when(mockHttpRequest).getMethod(); - Mockito.doAnswer(new Answer() { + public FullHttpRequest createHttpRequest() { + String uri = "/mapOutput?job=job_12345_1&dag=1&reduce=1"; + for (int i = 0; i < 100; i++) { + uri = uri.concat("&map=attempt_12345_1_m_" + i + "_0"); + } + return new DefaultFullHttpRequest(HttpVersion.HTTP_1_1, HttpMethod.GET, uri); + } + + @Test + public void testConfigPortStatic() throws Exception { + Random rand = new Random(); + int port = rand.nextInt(10) + 50000; + Configuration conf = new Configuration(); + // provide a port for ShuffleHandler + conf.setInt(ShuffleHandler.SHUFFLE_PORT_CONFIG_KEY, port); + MockShuffleHandler2 shuffleHandler = new MockShuffleHandler2(); + shuffleHandler.serviceInit(conf); + try { + shuffleHandler.serviceStart(); + Assert.assertEquals(port, shuffleHandler.getPort()); + } finally { + shuffleHandler.close(); + } + } + + @Test + public void testConfigPortDynamic() throws Exception { + Configuration conf = getInitialConf(); + MockShuffleHandler2 shuffleHandler = new MockShuffleHandler2(); + shuffleHandler.serviceInit(conf); + try { + shuffleHandler.serviceStart(); + Assert.assertTrue("ShuffleHandler should use a random chosen port", shuffleHandler.getPort() > 0); + } finally { + shuffleHandler.close(); + } + } + + private Configuration getInitialConf() { + Configuration conf = new Configuration(); + conf.set(HADOOP_TMP_DIR, TEST_DIR.getAbsolutePath()); + // 0 as config, should be dynamically chosen by netty + conf.setInt(ShuffleHandler.SHUFFLE_PORT_CONFIG_KEY, 0); + return conf; + } + + private ShuffleHandler getShuffleHandlerWithNoVerify() { + return new ShuffleHandler() { + private AuxiliaryLocalPathHandler pathHandler = new TestAuxiliaryLocalPathHandler(); + @Override - public Object answer(InvocationOnMock invocation) throws Throwable { - String uri = "/mapOutput?job=job_12345_1&dag=1&reduce=1"; - for (int i = 0; i < 100; i++) - uri = uri.concat("&map=attempt_12345_1_m_" + i + "_0"); - return uri; + protected Shuffle getShuffle(Configuration conf) { + // replace the shuffle handler with one stubbed for testing + return new Shuffle(conf) { + @Override + protected void verifyRequest(String appid, ChannelHandlerContext ctx, HttpRequest request, + HttpResponse response, URL requestUri) throws IOException { + // Do nothing. + } + }; } - }).when(mockHttpRequest).getUri(); - return mockHttpRequest; + @Override + public AuxiliaryLocalPathHandler getAuxiliaryLocalPathHandler() { + return pathHandler; + } + }; } } diff --git a/tez-plugins/tez-history-parser/pom.xml b/tez-plugins/tez-history-parser/pom.xml index 3eeb911c6b..243a5f8066 100644 --- a/tez-plugins/tez-history-parser/pom.xml +++ b/tez-plugins/tez-history-parser/pom.xml @@ -21,7 +21,7 @@ org.apache.tez tez-plugins - 0.9.1-SNAPSHOT + 0.10.5-SNAPSHOT tez-history-parser @@ -53,6 +53,10 @@ org.apache.tez tez-yarn-timeline-history + + org.apache.tez + tez-protobuf-history-plugin + org.apache.tez tez-yarn-timeline-history @@ -128,9 +132,13 @@ org.mockito - mockito-all + mockito-core test + + org.apache.commons + commons-collections4 + junit junit @@ -140,6 +148,10 @@ com.sun.jersey jersey-json + + org.apache.hadoop + hadoop-mapreduce-client-shuffle + diff --git a/tez-plugins/tez-history-parser/src/main/java/org/apache/tez/history/ATSImportTool.java b/tez-plugins/tez-history-parser/src/main/java/org/apache/tez/history/ATSImportTool.java index fee226acf3..c909f7aa0d 100644 --- a/tez-plugins/tez-history-parser/src/main/java/org/apache/tez/history/ATSImportTool.java +++ b/tez-plugins/tez-history-parser/src/main/java/org/apache/tez/history/ATSImportTool.java @@ -20,7 +20,7 @@ import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Joiner; -import com.google.common.base.Preconditions; +import org.apache.tez.common.Preconditions; import com.google.common.base.Strings; import com.sun.jersey.api.client.Client; import com.sun.jersey.api.client.ClientHandlerException; @@ -69,6 +69,7 @@ import java.net.URL; import java.net.URLEncoder; import java.util.Iterator; +import java.util.Objects; import java.util.zip.ZipEntry; import java.util.zip.ZipOutputStream; @@ -242,11 +243,12 @@ private void downloadData(ZipOutputStream zos) throws TezException, JSONExceptio * @throws IOException * @throws TezException * @throws JSONException + * @throws NullPointerException if {@code zos} is {@code null} */ private void downloadJSONArrayFromATS(String url, ZipOutputStream zos, String tag) throws IOException, TezException, JSONException { - Preconditions.checkArgument(zos != null, "ZipOutputStream can not be null"); + Objects.requireNonNull(zos, "ZipOutputStream can not be null"); String baseUrl = url; JSONArray entities; diff --git a/tez-plugins/tez-history-parser/src/main/java/org/apache/tez/history/parser/ATSFileParser.java b/tez-plugins/tez-history-parser/src/main/java/org/apache/tez/history/parser/ATSFileParser.java index fb42129d21..caeb406f21 100644 --- a/tez-plugins/tez-history-parser/src/main/java/org/apache/tez/history/parser/ATSFileParser.java +++ b/tez-plugins/tez-history-parser/src/main/java/org/apache/tez/history/parser/ATSFileParser.java @@ -18,7 +18,7 @@ package org.apache.tez.history.parser; -import com.google.common.base.Preconditions; +import org.apache.tez.common.Preconditions; import com.google.common.collect.Maps; import org.apache.commons.io.IOUtils; import org.apache.tez.dag.api.TezException; @@ -41,6 +41,8 @@ import java.io.InputStream; import java.util.Enumeration; import java.util.Iterator; +import java.util.List; +import java.util.Objects; import java.util.zip.ZipEntry; import java.util.zip.ZipFile; @@ -60,10 +62,10 @@ public class ATSFileParser extends BaseParser implements ATSData { private final File atsZipFile; - public ATSFileParser(File atsZipFile) throws TezException { + public ATSFileParser(List files) throws TezException { super(); - Preconditions.checkArgument(atsZipFile.exists(), "Zipfile " + atsZipFile + " does not exist"); - this.atsZipFile = atsZipFile; + Preconditions.checkArgument(checkFiles(files), "Zipfile " + files + " are empty or they don't exist"); + this.atsZipFile = files.get(0); //doesn't support multiple files at the moment } @Override @@ -72,14 +74,12 @@ public DagInfo getDAGData(String dagId) throws TezException { parseATSZipFile(atsZipFile); linkParsedContents(); + addRawDataToDagInfo(); return dagInfo; - } catch (IOException e) { + } catch (IOException | JSONException e) { LOG.error("Error in reading DAG ", e); throw new TezException(e); - } catch (JSONException e) { - LOG.error("Error in parsing DAG ", e); - throw new TezException(e); } catch (InterruptedException e) { throw new TezException(e); } @@ -90,18 +90,17 @@ public DagInfo getDAGData(String dagId) throws TezException { * * @param verticesJson * @throws JSONException + * @throws NullPointerException if {@code verticesJson} is {@code null} */ private void processVertices(JSONArray verticesJson) throws JSONException { //Process vertex information - Preconditions.checkState(verticesJson != null, "Vertex json can not be null"); - if (verticesJson != null) { - LOG.debug("Started parsing vertex"); - for (int i = 0; i < verticesJson.length(); i++) { - VertexInfo vertexInfo = VertexInfo.create(verticesJson.getJSONObject(i)); - vertexList.add(vertexInfo); - } - LOG.debug("Finished parsing vertex"); + Objects.requireNonNull(verticesJson, "Vertex json cannot be null"); + LOG.debug("Started parsing vertex"); + for (int i = 0; i < verticesJson.length(); i++) { + VertexInfo vertexInfo = VertexInfo.create(verticesJson.getJSONObject(i)); + vertexList.add(vertexInfo); } + LOG.debug("Finished parsing vertex"); } /** @@ -109,18 +108,17 @@ private void processVertices(JSONArray verticesJson) throws JSONException { * * @param tasksJson * @throws JSONException + * @throws NullPointerException if {@code verticesJson} is {@code null} */ private void processTasks(JSONArray tasksJson) throws JSONException { //Process Task information - Preconditions.checkState(tasksJson != null, "Task json can not be null"); - if (tasksJson != null) { - LOG.debug("Started parsing task"); - for (int i = 0; i < tasksJson.length(); i++) { - TaskInfo taskInfo = TaskInfo.create(tasksJson.getJSONObject(i)); - taskList.add(taskInfo); - } - LOG.debug("Finished parsing task"); + Objects.requireNonNull(tasksJson, "Task json can not be null"); + LOG.debug("Started parsing task"); + for (int i = 0; i < tasksJson.length(); i++) { + TaskInfo taskInfo = TaskInfo.create(tasksJson.getJSONObject(i)); + taskList.add(taskInfo); } + LOG.debug("Finished parsing task"); } /** @@ -128,18 +126,18 @@ private void processTasks(JSONArray tasksJson) throws JSONException { * * @param taskAttemptsJson * @throws JSONException + * @throws NullPointerException if {@code taskAttemptsJson} is {@code null} */ private void processAttempts(JSONArray taskAttemptsJson) throws JSONException { //Process TaskAttempt information - Preconditions.checkState(taskAttemptsJson != null, "Attempts json can not be null"); - if (taskAttemptsJson != null) { - LOG.debug("Started parsing task attempts"); - for (int i = 0; i < taskAttemptsJson.length(); i++) { - TaskAttemptInfo attemptInfo = TaskAttemptInfo.create(taskAttemptsJson.getJSONObject(i)); - attemptList.add(attemptInfo); - } - LOG.debug("Finished parsing task attempts"); + Objects.requireNonNull(taskAttemptsJson, "Attempts json can not be null"); + LOG.debug("Started parsing task attempts"); + for (int i = 0; i < taskAttemptsJson.length(); i++) { + TaskAttemptInfo attemptInfo = + TaskAttemptInfo.create(taskAttemptsJson.getJSONObject(i)); + attemptList.add(attemptInfo); } + LOG.debug("Finished parsing task attempts"); } /** diff --git a/tez-plugins/tez-history-parser/src/main/java/org/apache/tez/history/parser/ProtoHistoryParser.java b/tez-plugins/tez-history-parser/src/main/java/org/apache/tez/history/parser/ProtoHistoryParser.java new file mode 100644 index 0000000000..397a46fde9 --- /dev/null +++ b/tez-plugins/tez-history-parser/src/main/java/org/apache/tez/history/parser/ProtoHistoryParser.java @@ -0,0 +1,127 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

    + * http://www.apache.org/licenses/LICENSE-2.0 + *

    + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tez.history.parser; + +import java.io.File; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.util.Iterator; +import java.util.List; + +import org.apache.hadoop.fs.Path; +import org.apache.tez.common.Preconditions; +import org.apache.tez.dag.api.TezConfiguration; +import org.apache.tez.dag.api.TezException; +import org.apache.tez.dag.history.logging.proto.HistoryEventProtoJsonConversion; +import org.apache.tez.dag.history.logging.proto.HistoryLoggerProtos.HistoryEventProto; +import org.apache.tez.dag.history.logging.proto.ProtoMessageReader; +import org.apache.tez.history.parser.datamodel.DagInfo; +import org.codehaus.jettison.json.JSONException; +import org.codehaus.jettison.json.JSONObject; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.google.common.base.Strings; + +/** + * Parser utility to parse data generated by ProtoHistoryLoggingService. + */ +public class ProtoHistoryParser extends SimpleHistoryParser { + private static final Logger LOG = LoggerFactory.getLogger(ProtoHistoryParser.class); + private List protoFiles; + + public ProtoHistoryParser(List files) { + super(files); + this.protoFiles = files; + } + + /** + * Get in-memory representation of DagInfo. + * + * @return DagInfo + * @throws TezException + */ + public DagInfo getDAGData(String dagId) throws TezException { + try { + Preconditions.checkArgument(!Strings.isNullOrEmpty(dagId), "Please provide valid dagId"); + dagId = dagId.trim(); + parseContents(dagId); + linkParsedContents(); + addRawDataToDagInfo(); + return dagInfo; + } catch (IOException | JSONException e) { + LOG.error("Error in reading DAG ", e); + throw new TezException(e); + } + } + + private void parseContents(String dagId) + throws JSONException, FileNotFoundException, TezException, IOException { + JSONObjectSource source = getJsonSource(); + parse(dagId, source); + } + + private JSONObjectSource getJsonSource() throws IOException { + final TezConfiguration conf = new TezConfiguration(); + + Iterator fileIt = protoFiles.iterator(); + + JSONObjectSource source = new JSONObjectSource() { + private HistoryEventProto message = null; + private ProtoMessageReader reader = new ProtoMessageReader<>(conf, + new Path(fileIt.next().getPath()), HistoryEventProto.PARSER); + + @Override + public JSONObject next() throws JSONException { + return HistoryEventProtoJsonConversion.convertToJson(message); + } + + @Override + public boolean hasNext() throws IOException { + try { + message = (HistoryEventProto) reader.readEvent(); + return message != null; + } catch (java.io.EOFException e) { + reader.close(); + if (!fileIt.hasNext()) { + return false; + } else { + reader = new ProtoMessageReader<>(conf, new Path(fileIt.next().getPath()), + HistoryEventProto.PARSER); + try { + message = (HistoryEventProto) reader.readEvent(); + return message != null; + } catch (java.io.EOFException e2) { + return false; + } + } + } + } + + @Override + public void close() { + try { + reader.close(); + } catch (IOException e) { + LOG.warn("error while closing ProtoMessageReader", e); + } + } + }; + return source; + } +} \ No newline at end of file diff --git a/tez-plugins/tez-history-parser/src/main/java/org/apache/tez/history/parser/SimpleHistoryParser.java b/tez-plugins/tez-history-parser/src/main/java/org/apache/tez/history/parser/SimpleHistoryParser.java index 989dd5142b..b57f4f0e65 100644 --- a/tez-plugins/tez-history-parser/src/main/java/org/apache/tez/history/parser/SimpleHistoryParser.java +++ b/tez-plugins/tez-history-parser/src/main/java/org/apache/tez/history/parser/SimpleHistoryParser.java @@ -17,9 +17,16 @@ */ package org.apache.tez.history.parser; -import com.google.common.base.Preconditions; -import com.google.common.base.Strings; -import com.google.common.collect.Maps; +import java.io.File; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Scanner; + +import org.apache.tez.common.ATSConstants; +import org.apache.tez.common.Preconditions; import org.apache.tez.dag.api.TezException; import org.apache.tez.dag.history.logging.impl.SimpleHistoryLoggingService; import org.apache.tez.dag.records.TezDAGID; @@ -38,33 +45,34 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.io.File; -import java.io.FileNotFoundException; -import java.io.IOException; -import java.util.Iterator; -import java.util.Map; -import java.util.Scanner; +import com.google.common.base.Strings; +import com.google.common.collect.Maps; /** - * Parser utility to parse data generated by SimpleHistoryLogging to in-memory datamodel provided - * in org.apache.tez.history.parser.datamodel + * Parser utility to parse data generated by SimpleHistoryLogging to in-memory datamodel provided in + * org.apache.tez.history.parser.datamodel. *

    *

    - * Most of the information should be available. Minor info like VersionInfo may not be available, - * as it is not captured in SimpleHistoryLogging. + * Most of the information should be available. Minor info like VersionInfo may not be available, as + * it is not captured in SimpleHistoryLogging. */ public class SimpleHistoryParser extends BaseParser { private static final Logger LOG = LoggerFactory.getLogger(SimpleHistoryParser.class); - private static final String UTF8 = "UTF-8"; + protected static final String UTF8 = "UTF-8"; private final File historyFile; - - public SimpleHistoryParser(File historyFile) { + public SimpleHistoryParser(List files) { super(); - Preconditions.checkArgument(historyFile.exists(), historyFile + " does not exist"); - this.historyFile = historyFile; + Preconditions.checkArgument(checkFiles(files), files + " are empty or they don't exist"); + this.historyFile = files.get(0); //doesn't support multiple files at the moment } + protected interface JSONObjectSource { + boolean hasNext() throws IOException; + JSONObject next() throws JSONException; + void close(); + }; + /** * Get in-memory representation of DagInfo * @@ -77,13 +85,11 @@ public DagInfo getDAGData(String dagId) throws TezException { dagId = dagId.trim(); parseContents(historyFile, dagId); linkParsedContents(); + addRawDataToDagInfo(); return dagInfo; - } catch (IOException e) { + } catch (IOException | JSONException e) { LOG.error("Error in reading DAG ", e); throw new TezException(e); - } catch (JSONException e) { - LOG.error("Error in parsing DAG ", e); - throw new TezException(e); } } @@ -106,18 +112,117 @@ private void populateOtherInfo(JSONObject source, String entityName, } private void parseContents(File historyFile, String dagId) - throws JSONException, FileNotFoundException, TezException { - Scanner scanner = new Scanner(historyFile, UTF8); + throws JSONException, FileNotFoundException, TezException, IOException { + JSONObjectSource source = getJsonSource(); + + parse(dagId, source); + } + + private JSONObjectSource getJsonSource() throws FileNotFoundException { + final Scanner scanner = new Scanner(historyFile, UTF8); scanner.useDelimiter(SimpleHistoryLoggingService.RECORD_SEPARATOR); - JSONObject dagJson = null; + + JSONObjectSource source = new JSONObjectSource() { + @Override + public JSONObject next() throws JSONException { + String line = scanner.next(); + return new JSONObject(line); + } + + @Override + public boolean hasNext() throws IOException { + return scanner.hasNext(); + } + + @Override + public void close() { + scanner.close(); + } + }; + return source; + } + + protected void parse(String dagId, JSONObjectSource source) + throws JSONException, TezException, IOException { Map vertexJsonMap = Maps.newHashMap(); Map taskJsonMap = Maps.newHashMap(); Map attemptJsonMap = Maps.newHashMap(); + + readEventsFromSource(dagId, source, vertexJsonMap, taskJsonMap, attemptJsonMap); + postProcessMaps(vertexJsonMap, taskJsonMap, attemptJsonMap); + } + + protected void postProcessMaps(Map vertexJsonMap, + Map taskJsonMap, Map attemptJsonMap) + throws JSONException { + for (JSONObject jsonObject : vertexJsonMap.values()) { + VertexInfo vertexInfo = VertexInfo.create(jsonObject); + this.vertexList.add(vertexInfo); + LOG.debug("Parsed vertex {}", vertexInfo.getVertexName()); + } + for (JSONObject jsonObject : taskJsonMap.values()) { + TaskInfo taskInfo = TaskInfo.create(jsonObject); + this.taskList.add(taskInfo); + LOG.debug("Parsed task {}", taskInfo.getTaskId()); + } + for (JSONObject jsonObject : attemptJsonMap.values()) { + /** + * For converting SimpleHistoryLogging to in-memory representation + * + * We need to get "relatedEntities":[{"entity":"cn055-10.l42scl.hortonworks.com:58690", + * "entitytype":"nodeId"},{"entity":"container_1438652049951_0008_01_000152", + * "entitytype":"containerId"} and populate it in otherInfo object so that in-memory + * representation can parse it correctly + */ + JSONArray relatedEntities = jsonObject.optJSONArray(Constants.RELATED_ENTITIES); + if (relatedEntities == null) { + //This can happen when CONTAINER_EXITED abruptly. (e.g Container failed, exitCode=1) + LOG.debug("entity {} did not have related entities", + jsonObject.optJSONObject(Constants.ENTITY)); + } else { + JSONObject subJsonObject = relatedEntities.optJSONObject(0); + if (subJsonObject != null) { + String nodeId = subJsonObject.optString(Constants.ENTITY_TYPE); + if (!Strings.isNullOrEmpty(nodeId) && nodeId.equalsIgnoreCase(Constants.NODE_ID)) { + //populate it in otherInfo + JSONObject otherInfo = jsonObject.optJSONObject(Constants.OTHER_INFO); + String nodeIdVal = subJsonObject.optString(Constants.ENTITY); + if (otherInfo != null && nodeIdVal != null) { + otherInfo.put(Constants.NODE_ID, nodeIdVal); + } + } + } + + subJsonObject = relatedEntities.optJSONObject(1); + if (subJsonObject != null) { + String containerId = subJsonObject.optString(Constants.ENTITY_TYPE); + if (!Strings.isNullOrEmpty(containerId) && containerId + .equalsIgnoreCase(Constants.CONTAINER_ID)) { + //populate it in otherInfo + JSONObject otherInfo = jsonObject.optJSONObject(Constants.OTHER_INFO); + String containerIdVal = subJsonObject.optString(Constants.ENTITY); + if (otherInfo != null && containerIdVal != null) { + otherInfo.put(Constants.CONTAINER_ID, containerIdVal); + } + } + } + } + TaskAttemptInfo attemptInfo = TaskAttemptInfo.create(jsonObject); + this.attemptList.add(attemptInfo); + LOG.debug("Parsed task attempt {}", attemptInfo.getTaskAttemptId()); + } + } + + protected void readEventsFromSource(String dagId, JSONObjectSource source, + Map vertexJsonMap, Map taskJsonMap, + Map attemptJsonMap) throws JSONException, TezException, IOException{ + JSONObject dagJson = null; TezDAGID tezDAGID = TezDAGID.fromString(dagId); String userName = null; - while (scanner.hasNext()) { - String line = scanner.next(); - JSONObject jsonObject = new JSONObject(line); + + while (source.hasNext()) { + JSONObject jsonObject = source.next(); + String entity = jsonObject.getString(Constants.ENTITY); String entityType = jsonObject.getString(Constants.ENTITY_TYPE); switch (entityType) { @@ -131,9 +236,14 @@ private void parseContents(File historyFile, String dagId) // time etc). if (dagJson == null) { dagJson = jsonObject; + } else { + if (dagJson.optJSONObject(ATSConstants.OTHER_INFO).optJSONObject(ATSConstants.DAG_PLAN) == null) { + // if DAG_PLAN is not filled already, let's try to fetch it from other + dagJson.getJSONObject(ATSConstants.OTHER_INFO).put(ATSConstants.DAG_PLAN, + jsonObject.getJSONObject(ATSConstants.OTHER_INFO).getJSONObject(ATSConstants.DAG_PLAN)); + } + mergeSubJSONArray(jsonObject, dagJson, Constants.EVENTS); } - JSONObject otherInfo = jsonObject.optJSONObject(Constants.OTHER_INFO); - JSONObject dagOtherInfo = dagJson.getJSONObject(Constants.OTHER_INFO); JSONArray relatedEntities = dagJson.optJSONArray(Constants .RELATED_ENTITIES); //UserName is present in related entities @@ -148,52 +258,58 @@ private void parseContents(File historyFile, String dagId) } } } - populateOtherInfo(otherInfo, dagOtherInfo); + populateOtherInfo(jsonObject.optJSONObject(Constants.OTHER_INFO), + dagJson.getJSONObject(Constants.OTHER_INFO)); break; case Constants.TEZ_VERTEX_ID: String vertexName = entity; TezVertexID tezVertexID = TezVertexID.fromString(vertexName); - if (!tezDAGID.equals(tezVertexID.getDAGId())) { - LOG.warn(vertexName + " does not belong to " + tezDAGID); + if (!tezDAGID.equals(tezVertexID.getDAGID())) { + LOG.warn("{} does not belong to {} ('{}' != '{}')}", vertexName, tezDAGID, tezDAGID, tezVertexID.getDAGID()); continue; } if (!vertexJsonMap.containsKey(vertexName)) { vertexJsonMap.put(vertexName, jsonObject); + } else { + mergeSubJSONArray(jsonObject, vertexJsonMap.get(vertexName), Constants.EVENTS); } - otherInfo = jsonObject.optJSONObject(Constants.OTHER_INFO); - populateOtherInfo(otherInfo, vertexName, vertexJsonMap); + populateOtherInfo(jsonObject.optJSONObject(Constants.OTHER_INFO), vertexName, vertexJsonMap); break; case Constants.TEZ_TASK_ID: String taskName = entity; TezTaskID tezTaskID = TezTaskID.fromString(taskName); - if (!tezDAGID.equals(tezTaskID.getVertexID().getDAGId())) { - LOG.warn(taskName + " does not belong to " + tezDAGID); + if (!tezDAGID.equals(tezTaskID.getDAGID())) { + LOG.warn("{} does not belong to {} ('{}' != '{}')}", taskName, tezDAGID, tezDAGID, + tezTaskID.getDAGID()); continue; } if (!taskJsonMap.containsKey(taskName)) { taskJsonMap.put(taskName, jsonObject); + } else { + mergeSubJSONArray(jsonObject, taskJsonMap.get(taskName), Constants.EVENTS); } - otherInfo = jsonObject.optJSONObject(Constants.OTHER_INFO); - populateOtherInfo(otherInfo, taskName, taskJsonMap); + populateOtherInfo(jsonObject.optJSONObject(Constants.OTHER_INFO), taskName, taskJsonMap); break; case Constants.TEZ_TASK_ATTEMPT_ID: String taskAttemptName = entity; TezTaskAttemptID tezAttemptId = TezTaskAttemptID.fromString(taskAttemptName); - if (!tezDAGID.equals(tezAttemptId.getTaskID().getVertexID().getDAGId())) { - LOG.warn(taskAttemptName + " does not belong to " + tezDAGID); + if (!tezDAGID.equals(tezAttemptId.getDAGID())) { + LOG.warn("{} does not belong to {} ('{}' != '{}')}", taskAttemptName, tezDAGID, tezDAGID, + tezAttemptId.getDAGID()); continue; } if (!attemptJsonMap.containsKey(taskAttemptName)) { attemptJsonMap.put(taskAttemptName, jsonObject); + } else { + mergeSubJSONArray(jsonObject, attemptJsonMap.get(taskAttemptName), Constants.EVENTS); } - otherInfo = jsonObject.optJSONObject(Constants.OTHER_INFO); - populateOtherInfo(otherInfo, taskAttemptName, attemptJsonMap); + populateOtherInfo(jsonObject.optJSONObject(Constants.OTHER_INFO), taskAttemptName, attemptJsonMap); break; default: break; } } - scanner.close(); + source.close(); if (dagJson != null) { this.dagInfo = DagInfo.create(dagJson); setUserName(userName); @@ -202,61 +318,18 @@ private void parseContents(File historyFile, String dagId) throw new TezException( "Please provide a valid/complete history log file containing " + dagId); } - for (JSONObject jsonObject : vertexJsonMap.values()) { - VertexInfo vertexInfo = VertexInfo.create(jsonObject); - this.vertexList.add(vertexInfo); - LOG.debug("Parsed vertex {}", vertexInfo.getVertexName()); + } + + private void mergeSubJSONArray(JSONObject source, JSONObject destination, String key) + throws JSONException { + if (source.optJSONArray(key) == null) { + source.put(key, new JSONArray()); } - for (JSONObject jsonObject : taskJsonMap.values()) { - TaskInfo taskInfo = TaskInfo.create(jsonObject); - this.taskList.add(taskInfo); - LOG.debug("Parsed task {}", taskInfo.getTaskId()); + if (destination.optJSONArray(key) == null) { + destination.put(key, new JSONArray()); } - for (JSONObject jsonObject : attemptJsonMap.values()) { - /** - * For converting SimpleHistoryLogging to in-memory representation - * - * We need to get "relatedEntities":[{"entity":"cn055-10.l42scl.hortonworks.com:58690", - * "entitytype":"nodeId"},{"entity":"container_1438652049951_0008_01_000152", - * "entitytype":"containerId"} and populate it in otherInfo object so that in-memory - * representation can parse it correctly - */ - JSONArray relatedEntities = jsonObject.optJSONArray(Constants.RELATED_ENTITIES); - if (relatedEntities == null) { - //This can happen when CONTAINER_EXITED abruptly. (e.g Container failed, exitCode=1) - LOG.debug("entity {} did not have related entities", - jsonObject.optJSONObject(Constants.ENTITY)); - } else { - JSONObject subJsonObject = relatedEntities.optJSONObject(0); - if (subJsonObject != null) { - String nodeId = subJsonObject.optString(Constants.ENTITY_TYPE); - if (!Strings.isNullOrEmpty(nodeId) && nodeId.equalsIgnoreCase(Constants.NODE_ID)) { - //populate it in otherInfo - JSONObject otherInfo = jsonObject.optJSONObject(Constants.OTHER_INFO); - String nodeIdVal = subJsonObject.optString(Constants.ENTITY); - if (otherInfo != null && nodeIdVal != null) { - otherInfo.put(Constants.NODE_ID, nodeIdVal); - } - } - } - - subJsonObject = relatedEntities.optJSONObject(1); - if (subJsonObject != null) { - String containerId = subJsonObject.optString(Constants.ENTITY_TYPE); - if (!Strings.isNullOrEmpty(containerId) && containerId - .equalsIgnoreCase(Constants.CONTAINER_ID)) { - //populate it in otherInfo - JSONObject otherInfo = jsonObject.optJSONObject(Constants.OTHER_INFO); - String containerIdVal = subJsonObject.optString(Constants.ENTITY); - if (otherInfo != null && containerIdVal != null) { - otherInfo.put(Constants.CONTAINER_ID, containerIdVal); - } - } - } - } - TaskAttemptInfo attemptInfo = TaskAttemptInfo.create(jsonObject); - this.attemptList.add(attemptInfo); - LOG.debug("Parsed task attempt {}", attemptInfo.getTaskAttemptId()); + for (int i = 0; i < source.getJSONArray(key).length(); i++) { + destination.getJSONArray(key).put(source.getJSONArray(key).get(i)); } } } \ No newline at end of file diff --git a/tez-plugins/tez-history-parser/src/main/java/org/apache/tez/history/parser/datamodel/BaseInfo.java b/tez-plugins/tez-history-parser/src/main/java/org/apache/tez/history/parser/datamodel/BaseInfo.java index 3f9666a950..783f486a15 100644 --- a/tez-plugins/tez-history-parser/src/main/java/org/apache/tez/history/parser/datamodel/BaseInfo.java +++ b/tez-plugins/tez-history-parser/src/main/java/org/apache/tez/history/parser/datamodel/BaseInfo.java @@ -44,8 +44,20 @@ public abstract class BaseInfo { BaseInfo(JSONObject jsonObject) throws JSONException { final JSONObject otherInfoNode = jsonObject.getJSONObject(Constants.OTHER_INFO); //parse tez counters - tezCounters = Utils.parseTezCountersFromJSON( - otherInfoNode.optJSONObject(Constants.COUNTERS)); + JSONObject countersObj = otherInfoNode.optJSONObject(Constants.COUNTERS); + if (countersObj == null) { + /* + * This is a workaround for formatting differences, where a TaskFinishedEvent's + * counter is a correct json object shown as string, but VertexFinishedEvent's + * counter is an encoded json string, so the latter is interpreted as a String + * while parsing. The issue might be somewhere while converting these event objects + * to proto (HistoryEventProtoConverter). Even if should be fixed there, + * already generated events should be parsed correctly, hence this workaround. + * Will be investigated in the scope of TEZ-4324. + */ + countersObj = new JSONObject(otherInfoNode.optString(Constants.COUNTERS)); + } + tezCounters = Utils.parseTezCountersFromJSON(countersObj); //parse events eventList = Lists.newArrayList(); diff --git a/tez-plugins/tez-history-parser/src/main/java/org/apache/tez/history/parser/datamodel/BaseParser.java b/tez-plugins/tez-history-parser/src/main/java/org/apache/tez/history/parser/datamodel/BaseParser.java index 362dbd9837..9f3881c8b2 100644 --- a/tez-plugins/tez-history-parser/src/main/java/org/apache/tez/history/parser/datamodel/BaseParser.java +++ b/tez-plugins/tez-history-parser/src/main/java/org/apache/tez/history/parser/datamodel/BaseParser.java @@ -18,13 +18,14 @@ package org.apache.tez.history.parser.datamodel; -import com.google.common.base.Preconditions; +import org.apache.tez.common.Preconditions; import com.google.common.base.Predicate; import com.google.common.collect.Iterables; import com.google.common.collect.Lists; import org.apache.tez.dag.records.TezTaskAttemptID; import org.apache.tez.dag.records.TezTaskID; +import java.io.File; import java.util.List; import java.util.Map; @@ -44,6 +45,26 @@ public BaseParser() { attemptList = Lists.newLinkedList(); } + + protected boolean checkFiles(List files) { + if (files.isEmpty()) { + return false; + } + for (File file : files) { + if (!file.exists()) { + return false; + } + } + return true; + } + + + protected void addRawDataToDagInfo() { + dagInfo.addMeta("vertices", vertexList); + dagInfo.addMeta("tasks", taskList); + dagInfo.addMeta("taskAttempts", attemptList); + } + /** * link the parsed contents, so that it becomes easier to iterate from DAG-->Task and Task--DAG. * e.g Link vertex to dag, task to vertex, attempt to task etc @@ -69,9 +90,8 @@ protected void linkParsedContents() { //Link task to task attempt TezTaskAttemptID taskAttemptId = TezTaskAttemptID.fromString(attemptInfo .getTaskAttemptId()); - VertexInfo vertexInfo = dagInfo.getVertexFromId(taskAttemptId.getTaskID() - .getVertexID().toString()); - Preconditions.checkState(vertexInfo != null, "Vertex " + taskAttemptId.getTaskID() + VertexInfo vertexInfo = dagInfo.getVertexFromId(taskAttemptId.getVertexID().toString()); + Preconditions.checkState(vertexInfo != null, "Vertex " + taskAttemptId .getVertexID().toString() + " is not present in DAG"); TaskInfo taskInfo = vertexInfo.getTask(taskAttemptId.getTaskID().toString()); attemptInfo.setTaskInfo(taskInfo); diff --git a/tez-plugins/tez-history-parser/src/main/java/org/apache/tez/history/parser/datamodel/DagInfo.java b/tez-plugins/tez-history-parser/src/main/java/org/apache/tez/history/parser/datamodel/DagInfo.java index 6bd691c8b6..5067ec7aa8 100644 --- a/tez-plugins/tez-history-parser/src/main/java/org/apache/tez/history/parser/datamodel/DagInfo.java +++ b/tez-plugins/tez-history-parser/src/main/java/org/apache/tez/history/parser/datamodel/DagInfo.java @@ -18,7 +18,7 @@ package org.apache.tez.history.parser.datamodel; -import com.google.common.base.Preconditions; +import org.apache.tez.common.Preconditions; import com.google.common.base.Predicate; import com.google.common.collect.Iterables; import com.google.common.collect.LinkedHashMultimap; @@ -28,14 +28,14 @@ import com.google.common.collect.Multimap; import com.google.common.collect.Multimaps; import com.google.common.collect.Ordering; -import org.apache.commons.collections.BidiMap; -import org.apache.commons.collections.bidimap.DualHashBidiMap; +import org.apache.commons.collections4.BidiMap; +import org.apache.commons.collections4.bidimap.DualHashBidiMap; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.util.StringInterner; import org.apache.tez.client.CallerContext; import org.apache.tez.dag.api.event.VertexState; import org.apache.tez.dag.history.HistoryEventType; +import org.apache.tez.util.StringInterner; import org.codehaus.jettison.json.JSONArray; import org.codehaus.jettison.json.JSONException; import org.codehaus.jettison.json.JSONObject; @@ -44,6 +44,7 @@ import java.util.Collection; import java.util.Collections; import java.util.Comparator; +import java.util.HashMap; import java.util.List; import java.util.Map; @@ -71,7 +72,7 @@ public class DagInfo extends BaseInfo { private CallerContext callerContext; //VertexID --> VertexName & vice versa - private final BidiMap vertexNameIDMapping; + private final BidiMap vertexNameIDMapping; //edgeId to EdgeInfo mapping private final Map edgeInfoMap; @@ -85,11 +86,13 @@ public class DagInfo extends BaseInfo { private Multimap containerMapping; private Map config; + private Map meta = new HashMap(); + DagInfo(JSONObject jsonObject) throws JSONException { super(jsonObject); vertexNameMap = Maps.newHashMap(); - vertexNameIDMapping = new DualHashBidiMap(); + vertexNameIDMapping = new DualHashBidiMap<>(); edgeInfoMap = Maps.newHashMap(); basicVertexInfoMap = Maps.newHashMap(); containerMapping = LinkedHashMultimap.create(); @@ -97,7 +100,7 @@ public class DagInfo extends BaseInfo { Preconditions.checkArgument(jsonObject.getString(Constants.ENTITY_TYPE).equalsIgnoreCase (Constants.TEZ_DAG_ID)); - dagId = StringInterner.weakIntern(jsonObject.getString(Constants.ENTITY)); + dagId = StringInterner.intern(jsonObject.getString(Constants.ENTITY)); //Parse additional Info JSONObject otherInfoNode = jsonObject.getJSONObject(Constants.OTHER_INFO); @@ -137,7 +140,7 @@ public class DagInfo extends BaseInfo { diagnostics = otherInfoNode.optString(Constants.DIAGNOSTICS); failedTasks = otherInfoNode.optInt(Constants.NUM_FAILED_TASKS); JSONObject dagPlan = otherInfoNode.optJSONObject(Constants.DAG_PLAN); - name = StringInterner.weakIntern((dagPlan != null) ? (dagPlan.optString(Constants.DAG_NAME)) : null); + name = StringInterner.intern((dagPlan != null) ? (dagPlan.optString(Constants.DAG_NAME)) : null); if (dagPlan != null) { JSONArray vertices = dagPlan.optJSONArray(Constants.VERTICES); if (vertices != null) { @@ -149,7 +152,7 @@ public class DagInfo extends BaseInfo { } else { numVertices = 0; } - status = StringInterner.weakIntern(otherInfoNode.optString(Constants.STATUS)); + status = StringInterner.intern(otherInfoNode.optString(Constants.STATUS)); //parse name id mapping JSONObject vertexIDMappingJson = otherInfoNode.optJSONObject(Constants.VERTEX_NAME_ID_MAPPING); @@ -168,6 +171,10 @@ public static DagInfo create(JSONObject jsonObject) throws JSONException { return dagInfo; } + public void addMeta(String key, Object value) { + meta.put(key, value); + } + private void parseDAGPlan(JSONObject dagPlan) throws JSONException { int version = dagPlan.optInt(Constants.VERSION, 1); parseEdges(dagPlan.optJSONArray(Constants.EDGES)); @@ -320,7 +327,7 @@ void addVertexInfo(VertexInfo vertexInfo) { BasicVertexInfo basicVertexInfo = basicVertexInfoMap.get(vertexInfo.getVertexName()); Preconditions.checkArgument(basicVertexInfo != null, - "VerteName " + vertexInfo.getVertexName() + "VertexName " + vertexInfo.getVertexName() + " not present in DAG's vertices " + basicVertexInfoMap.entrySet()); //populate additional information in VertexInfo @@ -387,6 +394,19 @@ public String toString() { return sb.toString(); } + public String toExtendedString() { + StringBuilder sb = new StringBuilder(); + sb.append(toString()); + + try { + sb.append("\nmeta=").append(new JSONObject(meta).toString(3)); + } catch (JSONException e) { + throw new RuntimeException(e); + } + + return sb.toString(); + } + public Multimap getContainerMapping() { return Multimaps.unmodifiableMultimap(containerMapping); } @@ -607,7 +627,7 @@ public final Multimap getContainersToTaskAttemptMapp return Multimaps.unmodifiableMultimap(containerMapping); } - public final Map getVertexNameIDMapping() { + public final Map getVertexNameIDMapping() { return vertexNameIDMapping; } @@ -630,5 +650,4 @@ public final String getUserName() { final void setUserName(String userName) { this.userName = userName; } - } diff --git a/tez-plugins/tez-history-parser/src/main/java/org/apache/tez/history/parser/datamodel/TaskAttemptInfo.java b/tez-plugins/tez-history-parser/src/main/java/org/apache/tez/history/parser/datamodel/TaskAttemptInfo.java index 885d74392a..c45fcf5ff3 100644 --- a/tez-plugins/tez-history-parser/src/main/java/org/apache/tez/history/parser/datamodel/TaskAttemptInfo.java +++ b/tez-plugins/tez-history-parser/src/main/java/org/apache/tez/history/parser/datamodel/TaskAttemptInfo.java @@ -18,7 +18,7 @@ package org.apache.tez.history.parser.datamodel; -import com.google.common.base.Preconditions; +import org.apache.tez.common.Preconditions; import com.google.common.base.Strings; import com.google.common.collect.Lists; import com.google.common.collect.Maps; @@ -26,7 +26,6 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.util.StringInterner; import org.apache.tez.common.ATSConstants; import org.apache.tez.common.counters.DAGCounter; import org.apache.tez.common.counters.TaskCounter; @@ -34,6 +33,7 @@ import org.apache.tez.dag.api.oldrecords.TaskAttemptState; import org.apache.tez.dag.history.HistoryEventType; import org.apache.tez.history.parser.utils.Utils; +import org.apache.tez.util.StringInterner; import org.codehaus.jettison.json.JSONException; import org.codehaus.jettison.json.JSONObject; @@ -41,6 +41,7 @@ import java.util.Comparator; import java.util.List; import java.util.Map; +import java.util.Objects; import static org.apache.hadoop.classification.InterfaceStability.Evolving; import static org.apache.hadoop.classification.InterfaceAudience.Public; @@ -51,7 +52,7 @@ public class TaskAttemptInfo extends BaseInfo { private static final Log LOG = LogFactory.getLog(TaskAttemptInfo.class); - private static final String SUCCEEDED = StringInterner.weakIntern(TaskAttemptState.SUCCEEDED.name()); + private static final String SUCCEEDED = TaskAttemptState.SUCCEEDED.name(); private final String taskAttemptId; private final long startTime; @@ -96,7 +97,7 @@ public String getTaskAttemptId() { jsonObject.getString(Constants.ENTITY_TYPE).equalsIgnoreCase (Constants.TEZ_TASK_ATTEMPT_ID)); - taskAttemptId = StringInterner.weakIntern(jsonObject.optString(Constants.ENTITY)); + taskAttemptId = StringInterner.intern(jsonObject.optString(Constants.ENTITY)); //Parse additional Info final JSONObject otherInfoNode = jsonObject.getJSONObject(Constants.OTHER_INFO); @@ -132,15 +133,15 @@ public String getTaskAttemptId() { diagnostics = otherInfoNode.optString(Constants.DIAGNOSTICS); creationTime = otherInfoNode.optLong(Constants.CREATION_TIME); - creationCausalTA = StringInterner.weakIntern( + creationCausalTA = StringInterner.intern( otherInfoNode.optString(Constants.CREATION_CAUSAL_ATTEMPT)); allocationTime = otherInfoNode.optLong(Constants.ALLOCATION_TIME); - containerId = StringInterner.weakIntern(otherInfoNode.optString(Constants.CONTAINER_ID)); + containerId = StringInterner.intern(otherInfoNode.optString(Constants.CONTAINER_ID)); String id = otherInfoNode.optString(Constants.NODE_ID); - nodeId = StringInterner.weakIntern((id != null) ? (id.split(":")[0]) : ""); + nodeId = StringInterner.intern((id != null) ? (id.split(":")[0]) : ""); logUrl = otherInfoNode.optString(Constants.COMPLETED_LOGS_URL); - status = StringInterner.weakIntern(otherInfoNode.optString(Constants.STATUS)); + status = StringInterner.intern(otherInfoNode.optString(Constants.STATUS)); container = new Container(containerId, nodeId); if (otherInfoNode.has(Constants.LAST_DATA_EVENTS)) { List eventInfo = Utils.parseDataEventDependencyFromJSON( @@ -154,7 +155,7 @@ public String getTaskAttemptId() { } } terminationCause = StringInterner - .weakIntern(otherInfoNode.optString(ATSConstants.TASK_ATTEMPT_ERROR_ENUM)); + .intern(otherInfoNode.optString(ATSConstants.TASK_ATTEMPT_ERROR_ENUM)); executionTimeInterval = (endTime > startTime) ? (endTime - startTime) : 0; } @@ -169,8 +170,7 @@ public int compare(TaskAttemptInfo o1, TaskAttemptInfo o2) { } void setTaskInfo(TaskInfo taskInfo) { - Preconditions.checkArgument(taskInfo != null, "Provide valid taskInfo"); - this.taskInfo = taskInfo; + this.taskInfo = Objects.requireNonNull(taskInfo, "Provide valid taskInfo"); taskInfo.addTaskAttemptInfo(this); } @@ -405,7 +405,7 @@ public String toString() { sb.append("container=").append(getContainer()).append(", "); sb.append("nodeId=").append(getNodeId()).append(", "); sb.append("logURL=").append(getLogURL()).append(", "); - sb.append("status=").append(getStatus()); + sb.append("status=").append(getDetailedStatus()); sb.append("]"); return sb.toString(); } diff --git a/tez-plugins/tez-history-parser/src/main/java/org/apache/tez/history/parser/datamodel/TaskInfo.java b/tez-plugins/tez-history-parser/src/main/java/org/apache/tez/history/parser/datamodel/TaskInfo.java index fb3f232d49..e354f624f6 100644 --- a/tez-plugins/tez-history-parser/src/main/java/org/apache/tez/history/parser/datamodel/TaskInfo.java +++ b/tez-plugins/tez-history-parser/src/main/java/org/apache/tez/history/parser/datamodel/TaskInfo.java @@ -18,7 +18,7 @@ package org.apache.tez.history.parser.datamodel; -import com.google.common.base.Preconditions; +import org.apache.tez.common.Preconditions; import com.google.common.base.Predicate; import com.google.common.base.Strings; import com.google.common.collect.Iterables; @@ -31,9 +31,9 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.util.StringInterner; import org.apache.tez.dag.api.oldrecords.TaskAttemptState; import org.apache.tez.dag.history.HistoryEventType; +import org.apache.tez.util.StringInterner; import org.codehaus.jettison.json.JSONException; import org.codehaus.jettison.json.JSONObject; @@ -41,6 +41,7 @@ import java.util.Comparator; import java.util.List; import java.util.Map; +import java.util.Objects; import static org.apache.hadoop.classification.InterfaceAudience.Public; import static org.apache.hadoop.classification.InterfaceStability.Evolving; @@ -71,7 +72,7 @@ public class TaskInfo extends BaseInfo { jsonObject.getString(Constants.ENTITY_TYPE).equalsIgnoreCase (Constants.TEZ_TASK_ID)); - taskId = StringInterner.weakIntern(jsonObject.optString(Constants.ENTITY)); + taskId = StringInterner.intern(jsonObject.optString(Constants.ENTITY)); //Parse additional Info final JSONObject otherInfoNode = jsonObject.getJSONObject(Constants.OTHER_INFO); @@ -106,10 +107,10 @@ public class TaskInfo extends BaseInfo { endTime = eTime; diagnostics = otherInfoNode.optString(Constants.DIAGNOSTICS); - successfulAttemptId = StringInterner.weakIntern( - otherInfoNode.optString(Constants.SUCCESSFUL_ATTEMPT_ID)); + successfulAttemptId = StringInterner + .intern(otherInfoNode.optString(Constants.SUCCESSFUL_ATTEMPT_ID)); scheduledTime = otherInfoNode.optLong(Constants.SCHEDULED_TIME); - status = StringInterner.weakIntern(otherInfoNode.optString(Constants.STATUS)); + status = StringInterner.intern(otherInfoNode.optString(Constants.STATUS)); } @Override @@ -153,8 +154,7 @@ void addTaskAttemptInfo(TaskAttemptInfo taskAttemptInfo) { } void setVertexInfo(VertexInfo vertexInfo) { - Preconditions.checkArgument(vertexInfo != null, "Provide valid vertexInfo"); - this.vertexInfo = vertexInfo; + this.vertexInfo = Objects.requireNonNull(vertexInfo, "Provide valid vertexInfo"); //link it to vertex vertexInfo.addTaskInfo(this); } @@ -276,7 +276,7 @@ public final TaskAttemptInfo getLastTaskAttemptToFinish() { } /** - * Get average task attempt duration. Includes succesful and failed tasks + * Get average task attempt duration. Includes successful and failed tasks * * @return float */ diff --git a/tez-plugins/tez-history-parser/src/main/java/org/apache/tez/history/parser/datamodel/VertexInfo.java b/tez-plugins/tez-history-parser/src/main/java/org/apache/tez/history/parser/datamodel/VertexInfo.java index 0f6831b873..106f2bc729 100644 --- a/tez-plugins/tez-history-parser/src/main/java/org/apache/tez/history/parser/datamodel/VertexInfo.java +++ b/tez-plugins/tez-history-parser/src/main/java/org/apache/tez/history/parser/datamodel/VertexInfo.java @@ -18,7 +18,7 @@ package org.apache.tez.history.parser.datamodel; -import com.google.common.base.Preconditions; +import org.apache.tez.common.Preconditions; import com.google.common.base.Predicate; import com.google.common.collect.Iterables; import com.google.common.collect.LinkedHashMultimap; @@ -30,9 +30,9 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.util.StringInterner; import org.apache.tez.dag.api.oldrecords.TaskState; import org.apache.tez.dag.history.HistoryEventType; +import org.apache.tez.util.StringInterner; import org.codehaus.jettison.json.JSONException; import org.codehaus.jettison.json.JSONObject; @@ -41,6 +41,7 @@ import java.util.Comparator; import java.util.List; import java.util.Map; +import java.util.Objects; import static org.apache.hadoop.classification.InterfaceAudience.Public; import static org.apache.hadoop.classification.InterfaceStability.Evolving; @@ -91,7 +92,7 @@ public class VertexInfo extends BaseInfo { jsonObject.getString(Constants.ENTITY_TYPE).equalsIgnoreCase (Constants.TEZ_VERTEX_ID)); - vertexId = StringInterner.weakIntern(jsonObject.optString(Constants.ENTITY)); + vertexId = StringInterner.intern(jsonObject.optString(Constants.ENTITY)); taskInfoMap = Maps.newHashMap(); inEdgeList = Lists.newLinkedList(); @@ -149,9 +150,9 @@ public class VertexInfo extends BaseInfo { killedTasks = otherInfoNode.optInt(Constants.NUM_KILLED_TASKS); numFailedTaskAttempts = otherInfoNode.optInt(Constants.NUM_FAILED_TASKS_ATTEMPTS); - vertexName = StringInterner.weakIntern(otherInfoNode.optString(Constants.VERTEX_NAME)); - processorClass = StringInterner.weakIntern(otherInfoNode.optString(Constants.PROCESSOR_CLASS_NAME)); - status = StringInterner.weakIntern(otherInfoNode.optString(Constants.STATUS)); + vertexName = StringInterner.intern(otherInfoNode.optString(Constants.VERTEX_NAME)); + processorClass = StringInterner.intern(otherInfoNode.optString(Constants.PROCESSOR_CLASS_NAME)); + status = StringInterner.intern(otherInfoNode.optString(Constants.STATUS)); } public static VertexInfo create(JSONObject vertexInfoObject) throws @@ -198,8 +199,7 @@ void addOutEdge(EdgeInfo edgeInfo) { } void setDagInfo(DagInfo dagInfo) { - Preconditions.checkArgument(dagInfo != null, "Provide valid dagInfo"); - this.dagInfo = dagInfo; + this.dagInfo = Objects.requireNonNull(dagInfo, "Provide valid dagInfo"); //link vertex to dagInfo dagInfo.addVertexInfo(this); updateEdgeInfo(); @@ -521,9 +521,7 @@ public final TaskInfo getFirstTaskToStart() { } Collections.sort(taskInfoList, new Comparator() { @Override public int compare(TaskInfo o1, TaskInfo o2) { - return (o1.getStartTimeInterval() < o2.getStartTimeInterval()) ? -1 : - ((o1.getStartTimeInterval() == o2.getStartTimeInterval()) ? - 0 : 1); + return Long.compare(o1.getStartTimeInterval(), o2.getStartTimeInterval()); } }); return taskInfoList.get(0); @@ -541,9 +539,7 @@ public final TaskInfo getLastTaskToFinish() { } Collections.sort(taskInfoList, new Comparator() { @Override public int compare(TaskInfo o1, TaskInfo o2) { - return (o1.getFinishTimeInterval() > o2.getFinishTimeInterval()) ? -1 : - ((o1.getStartTimeInterval() == o2.getStartTimeInterval()) ? - 0 : 1); + return -1 * Long.compare(o1.getFinishTimeInterval(), o2.getFinishTimeInterval()); } }); return taskInfoList.get(0); @@ -589,8 +585,7 @@ public final long getMaxTaskDuration() { private Ordering orderingOnTimeTaken() { return Ordering.from(new Comparator() { @Override public int compare(TaskInfo o1, TaskInfo o2) { - return (o1.getTimeTaken() < o2.getTimeTaken()) ? -1 : - ((o1.getTimeTaken() == o2.getTimeTaken()) ? 0 : 1); + return Long.compare(o1.getTimeTaken(), o2.getTimeTaken()); } }); } @@ -598,8 +593,7 @@ private Ordering orderingOnTimeTaken() { private Ordering orderingOnStartTime() { return Ordering.from(new Comparator() { @Override public int compare(TaskInfo o1, TaskInfo o2) { - return (o1.getStartTimeInterval() < o2.getStartTimeInterval()) ? -1 : - ((o1.getStartTimeInterval() == o2.getStartTimeInterval()) ? 0 : 1); + return Long.compare(o1.getStartTimeInterval(), o2.getStartTimeInterval()); } }); } @@ -607,8 +601,7 @@ private Ordering orderingOnStartTime() { private Ordering orderingOnAttemptStartTime() { return Ordering.from(new Comparator() { @Override public int compare(TaskAttemptInfo o1, TaskAttemptInfo o2) { - return (o1.getStartTimeInterval() < o2.getStartTimeInterval()) ? -1 : - ((o1.getStartTimeInterval() == o2.getStartTimeInterval()) ? 0 : 1); + return Long.compare(o1.getStartTimeInterval(), o2.getStartTimeInterval()); } }); } diff --git a/tez-plugins/tez-history-parser/src/main/java/org/apache/tez/history/parser/utils/Utils.java b/tez-plugins/tez-history-parser/src/main/java/org/apache/tez/history/parser/utils/Utils.java index aacec8ee12..7cf1e1af6b 100644 --- a/tez-plugins/tez-history-parser/src/main/java/org/apache/tez/history/parser/utils/Utils.java +++ b/tez-plugins/tez-history-parser/src/main/java/org/apache/tez/history/parser/utils/Utils.java @@ -22,7 +22,6 @@ import com.google.common.collect.Lists; import org.apache.hadoop.classification.InterfaceAudience; -import org.apache.hadoop.util.StringInterner; import org.apache.log4j.ConsoleAppender; import org.apache.log4j.Level; import org.apache.log4j.Logger; @@ -34,6 +33,7 @@ import org.apache.tez.history.parser.datamodel.Constants; import org.apache.tez.history.parser.datamodel.Event; import org.apache.tez.history.parser.datamodel.TaskAttemptInfo.DataDependencyEvent; +import org.apache.tez.util.StringInterner; import org.codehaus.jettison.json.JSONArray; import org.codehaus.jettison.json.JSONException; import org.codehaus.jettison.json.JSONObject; @@ -42,11 +42,12 @@ import java.util.List; @InterfaceAudience.Private -public class Utils { +public final class Utils { private static final String LOG4J_CONFIGURATION = "log4j.configuration"; private static final org.slf4j.Logger LOG = LoggerFactory.getLogger(Utils.class); + private Utils() {} /** * Parse tez counters from json @@ -106,7 +107,7 @@ public static List parseDataEventDependencyFromJSON(JSONObj for (int i=0; i eventList) thro JSONObject eventNode = eventNodes.optJSONObject(i); final String eventInfo = eventNode.optString(Constants.EVENT_INFO); final String eventType = eventNode.optString(Constants.EVENT_TYPE); - final long time = eventNode.optLong(Constants.EVENT_TIME_STAMP); + final long time = eventNode.optLong(Constants.EVENT_TIME_STAMP) == 0 + ? eventNode.optLong(Constants.TIMESTAMP) : eventNode.optLong(Constants.EVENT_TIME_STAMP); Event event = new Event(eventInfo, eventType, time); eventList.add(event); - } } diff --git a/tez-plugins/tez-history-parser/src/test/java/org/apache/tez/history/TestHistoryParser.java b/tez-plugins/tez-history-parser/src/test/java/org/apache/tez/history/TestHistoryParser.java index 92c4ad8683..8a054655e4 100644 --- a/tez-plugins/tez-history-parser/src/test/java/org/apache/tez/history/TestHistoryParser.java +++ b/tez-plugins/tez-history-parser/src/test/java/org/apache/tez/history/TestHistoryParser.java @@ -88,6 +88,7 @@ import java.io.File; import java.io.IOException; import java.io.OutputStreamWriter; +import java.util.Arrays; import java.util.Collection; import java.util.Iterator; import java.util.List; @@ -239,7 +240,7 @@ private DagInfo getDagInfoFromSimpleHistory(String dagId) throws TezException, I File localFile = new File(DOWNLOAD_DIR, HISTORY_TXT); //Now parse via SimpleHistory - SimpleHistoryParser parser = new SimpleHistoryParser(localFile); + SimpleHistoryParser parser = new SimpleHistoryParser(Arrays.asList(localFile)); DagInfo dagInfo = parser.getDAGData(dagId); assertTrue(dagInfo.getDagId().equals(dagId)); return dagInfo; @@ -603,7 +604,7 @@ private DagInfo getDagInfo(String dagId) throws TezException { //Parse downloaded contents File downloadedFile = new File(DOWNLOAD_DIR + Path.SEPARATOR + dagId + ".zip"); - ATSFileParser parser = new ATSFileParser(downloadedFile); + ATSFileParser parser = new ATSFileParser(Arrays.asList(downloadedFile)); DagInfo dagInfo = parser.getDAGData(dagId); assertTrue(dagInfo.getDagId().equals(dagId)); return dagInfo; diff --git a/tez-plugins/tez-protobuf-history-plugin/findbugs-exclude.xml b/tez-plugins/tez-protobuf-history-plugin/findbugs-exclude.xml new file mode 100644 index 0000000000..c91265d650 --- /dev/null +++ b/tez-plugins/tez-protobuf-history-plugin/findbugs-exclude.xml @@ -0,0 +1,50 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tez-plugins/tez-protobuf-history-plugin/pom.xml b/tez-plugins/tez-protobuf-history-plugin/pom.xml new file mode 100644 index 0000000000..d38983d8ff --- /dev/null +++ b/tez-plugins/tez-protobuf-history-plugin/pom.xml @@ -0,0 +1,88 @@ + + + + 4.0.0 + + org.apache.tez + tez-plugins + 0.10.5-SNAPSHOT + + tez-protobuf-history-plugin + + + + org.apache.tez + tez-common + + + org.apache.tez + tez-dag + + + org.apache.hadoop + hadoop-common + + + com.google.protobuf + protobuf-java + + + junit + junit + test + + + org.mockito + mockito-core + test + + + + + + + org.apache.rat + apache-rat-plugin + + + com.github.os72 + protoc-jar-maven-plugin + + + generate-sources + + run + + + com.google.protobuf:protoc:${protobuf.version} + ${protoc.path} + none + + ${basedir}/src/main/proto + + + + ${project.build.directory}/generated-sources/java + + + + + + + + + diff --git a/tez-plugins/tez-protobuf-history-plugin/src/main/java/org/apache/tez/dag/history/logging/proto/DagManifesFileScanner.java b/tez-plugins/tez-protobuf-history-plugin/src/main/java/org/apache/tez/dag/history/logging/proto/DagManifesFileScanner.java new file mode 100644 index 0000000000..addb148530 --- /dev/null +++ b/tez-plugins/tez-protobuf-history-plugin/src/main/java/org/apache/tez/dag/history/logging/proto/DagManifesFileScanner.java @@ -0,0 +1,225 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

    + * http://www.apache.org/licenses/LICENSE-2.0 + *

    + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tez.dag.history.logging.proto; + +import java.io.Closeable; +import java.io.IOException; +import java.security.PrivilegedAction; +import java.time.LocalDate; +import java.time.LocalDateTime; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Map; + +import com.fasterxml.jackson.databind.ObjectMapper; +import org.apache.commons.io.IOUtils; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.security.UserGroupInformation; +import org.apache.tez.dag.api.TezConfiguration; +import org.apache.tez.dag.history.logging.proto.HistoryLoggerProtos.ManifestEntryProto; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Helper class to scan all the dag manifest files to get manifest entries. This class is + * not thread safe. + */ +public class DagManifesFileScanner implements Closeable { + private static final Logger LOG = LoggerFactory.getLogger(DagManifesFileScanner.class); + private static final int SCANNER_OFFSET_VERSION = 2; + private static final int MAX_RETRY = 3; + + private final ObjectMapper mapper = new ObjectMapper(); + private final DatePartitionedLogger manifestLogger; + private final long syncTime; + private final boolean withDoas; + + private String scanDir; + private Map offsets; + private Map retryCount; + private List newFiles; + + private ProtoMessageReader reader; + private String currentFilePath; + + public DagManifesFileScanner(DatePartitionedLogger manifestLogger) { + this.manifestLogger = manifestLogger; + this.syncTime = manifestLogger.getConfig().getLong( + TezConfiguration.TEZ_HISTORY_LOGGING_PROTO_SYNC_WINDOWN_SECS, + TezConfiguration.TEZ_HISTORY_LOGGING_PROTO_SYNC_WINDOWN_SECS_DEFAULT); + this.withDoas = manifestLogger.getConfig().getBoolean( + TezConfiguration.TEZ_HISTORY_LOGGING_PROTO_DOAS, + TezConfiguration.TEZ_HISTORY_LOGGING_PROTO_DOAS_DEFAULT); + this.setOffset(LocalDate.ofEpochDay(0)); + } + + // Update the offset version and checks below to ensure correct versions are supported. + // All public to simplify json conversion. + public static class DagManifestOffset { + public int version; + public String scanDir; + public Map offsets; + public Map retryCount; + } + + public void setOffset(String offset) { + try { + DagManifestOffset dagOffset = mapper.readValue(offset, DagManifestOffset.class); + if (dagOffset.version > SCANNER_OFFSET_VERSION) { + throw new IllegalArgumentException("Version mismatch: " + dagOffset.version); + } + this.scanDir = dagOffset.scanDir; + this.offsets = dagOffset.offsets == null ? new HashMap<>() : dagOffset.offsets; + this.retryCount = dagOffset.retryCount == null ? new HashMap<>() : dagOffset.retryCount; + this.newFiles = new ArrayList<>(); + } catch (IOException e) { + throw new IllegalArgumentException("Invalid offset", e); + } + } + + public void setOffset(LocalDate date) { + this.scanDir = manifestLogger.getDirForDate(date); + this.offsets = new HashMap<>(); + this.retryCount = new HashMap<>(); + this.newFiles = new ArrayList<>(); + } + + public String getOffset() { + try { + DagManifestOffset offset = new DagManifestOffset(); + offset.version = SCANNER_OFFSET_VERSION; + offset.scanDir = scanDir; + offset.offsets = offsets; + offset.retryCount = retryCount; + return mapper.writeValueAsString(offset); + } catch (IOException e) { + throw new RuntimeException("Unexpected exception while converting to json.", e); + } + } + + public ManifestEntryProto getNext() throws IOException { + while (true) { + if (reader != null) { + ManifestEntryProto evt = null; + try { + evt = reader.readEvent(); + retryCount.remove(currentFilePath); + } catch (IOException e) { + LOG.error("Error trying to read event from file: {}", currentFilePath, e); + incrementError(currentFilePath); + } + if (evt != null) { + offsets.put(reader.getFilePath().getName(), reader.getOffset()); + return evt; + } else { + IOUtils.closeQuietly(reader); + reader = null; + currentFilePath = null; + } + } + if (!newFiles.isEmpty()) { + this.reader = getNextReader(); + this.currentFilePath = reader != null ? reader.getFilePath().toString() : null; + } else { + if (!loadMore()) { + return null; + } + } + } + } + + private void incrementError(String path) { + int count = retryCount.getOrDefault(path, 0); + retryCount.put(path, count + 1); + } + + private ProtoMessageReader getNextReader() throws IOException { + FileStatus status = newFiles.remove(0); + PrivilegedAction> action = () -> { + try { + return manifestLogger.getReader(status.getPath()); + } catch (IOException e) { + String path = status.getPath().toString(); + LOG.error("Error trying to open file: {}", path, e); + incrementError(path); + return null; + } + }; + if (withDoas) { + UserGroupInformation proxyUser = UserGroupInformation.createProxyUser( + status.getOwner(), UserGroupInformation.getCurrentUser()); + return proxyUser.doAs(action); + } else { + return action.run(); + } + } + + @Override + public void close() throws IOException { + if (reader != null) { + reader.close(); + reader = null; + } + } + + private void filterErrors(List files) { + Iterator iter = files.iterator(); + while (iter.hasNext()) { + FileStatus status = iter.next(); + String path = status.getPath().toString(); + if (retryCount.getOrDefault(path, 0) > MAX_RETRY) { + LOG.warn("Removing file {}, too many errors", path); + iter.remove(); + } + } + } + + private void loadNewFiles(String todayDir) throws IOException { + newFiles = manifestLogger.scanForChangedFiles(scanDir, offsets); + if (!scanDir.equals(todayDir)) { + filterErrors(newFiles); + } + } + + private boolean loadMore() throws IOException { + LocalDateTime now = manifestLogger.getNow(); + LocalDate today = now.toLocalDate(); + String todayDir = manifestLogger.getDirForDate(today); + loadNewFiles(todayDir); + while (newFiles.isEmpty()) { + if (now.getHour() * 3600 + now.getMinute() * 60 + now.getSecond() < syncTime) { + // We are in the delay window for today, do not advance date if we are moving from + // yesterday. + if (scanDir.equals(manifestLogger.getDirForDate(today.minusDays(1)))) { + return false; + } + } + String nextDir = manifestLogger.getNextDirectory(scanDir); + if (nextDir == null) { + return false; + } + scanDir = nextDir; + offsets = new HashMap<>(); + retryCount = new HashMap<>(); + loadNewFiles(todayDir); + } + return true; + } +} diff --git a/tez-plugins/tez-protobuf-history-plugin/src/main/java/org/apache/tez/dag/history/logging/proto/DatePartitionedLogger.java b/tez-plugins/tez-protobuf-history-plugin/src/main/java/org/apache/tez/dag/history/logging/proto/DatePartitionedLogger.java new file mode 100644 index 0000000000..ee838646fb --- /dev/null +++ b/tez-plugins/tez-protobuf-history-plugin/src/main/java/org/apache/tez/dag/history/logging/proto/DatePartitionedLogger.java @@ -0,0 +1,201 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

    + * http://www.apache.org/licenses/LICENSE-2.0 + *

    + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tez.dag.history.logging.proto; + +import java.io.FileNotFoundException; +import java.io.IOException; +import java.time.LocalDate; +import java.time.LocalDateTime; +import java.time.ZoneOffset; +import java.time.format.DateTimeFormatter; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.permission.FsPermission; +import org.apache.hadoop.fs.RemoteIterator; +import org.apache.hadoop.yarn.util.Clock; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.google.protobuf.MessageLite; +import com.google.protobuf.Parser; + +/** + * Class to create proto reader and writer for a date partitioned directory structure. + * + * @param The proto message type. + */ +public class DatePartitionedLogger { + private static final Logger LOG = LoggerFactory.getLogger(DatePartitionedLogger.class); + // Everyone has permission to write, but with sticky set so that delete is restricted. + // This is required, since the path is same for all users and everyone writes into it. + private static final FsPermission DIR_PERMISSION = FsPermission.createImmutable((short)01777); + + // Since the directories have broad permissions restrict the file read access. + private static final FsPermission FILE_UMASK = FsPermission.createImmutable((short)0066); + + private final Parser parser; + private final Path basePath; + private final Configuration conf; + private final Clock clock; + + public DatePartitionedLogger(Parser parser, Path baseDir, Configuration conf, Clock clock) + throws IOException { + this.conf = new Configuration(conf); + this.clock = clock; + this.parser = parser; + createDirIfNotExists(baseDir); + this.basePath = baseDir.getFileSystem(conf).resolvePath(baseDir); + FsPermission.setUMask(this.conf, FILE_UMASK); + } + + private void createDirIfNotExists(Path path) throws IOException { + FileSystem fileSystem = path.getFileSystem(conf); + FileStatus fileStatus = null; + try { + fileStatus = fileSystem.getFileStatus(path); + } catch (FileNotFoundException fnf) { + // ignore: regardless of the outcome of FileSystem.getFileStatus call (exception or returning null), + // we handle the fileStatus == null case later on, so it's safe to ignore this exception now + } + try { + if (fileStatus == null) { + fileSystem.mkdirs(path); + fileSystem.setPermission(path, DIR_PERMISSION); + } else if (!fileStatus.getPermission().equals(DIR_PERMISSION)) { + LOG.info("Permission on path {} is {}, setting it to {}", path, fileStatus.getPermission(), DIR_PERMISSION); + fileSystem.setPermission(path, DIR_PERMISSION); + } + } catch (IOException e) { + // Ignore this exception, if there is a problem it'll fail when trying to read or write. + LOG.warn("Error while trying to set permission: ", e); + } + } + + /** + * Creates a writer for the given fileName, with date as today. + */ + public ProtoMessageWriter getWriter(String fileName) throws IOException { + Path filePath = getPathForDate(getNow().toLocalDate(), fileName); + return new ProtoMessageWriter<>(conf, filePath, parser); + } + + /** + * Creates a reader for the given filePath, no validation is done. + */ + public ProtoMessageReader getReader(Path filePath) throws IOException { + return new ProtoMessageReader<>(conf, filePath, parser); + } + + /** + * Create a path for the given date and fileName. This can be used to create a reader. + */ + public Path getPathForDate(LocalDate date, String fileName) throws IOException { + Path path = new Path(basePath, getDirForDate(date)); + createDirIfNotExists(path); + return new Path(path, fileName); + } + + public Path getPathForSubdir(String dirName, String fileName) { + return new Path(new Path(basePath, dirName), fileName); + } + + /** + * Extract the date from the directory name, this should be a directory created by this class. + */ + public LocalDate getDateFromDir(String dirName) { + if (!dirName.startsWith("date=")) { + throw new IllegalArgumentException("Invalid directory: "+ dirName); + } + return LocalDate.parse(dirName.substring(5), DateTimeFormatter.ISO_LOCAL_DATE); + } + + /** + * Returns the directory name for a given date. + */ + public String getDirForDate(LocalDate date) { + return "date=" + DateTimeFormatter.ISO_LOCAL_DATE.format(date); + } + + /** + * Find next available directory, after the given directory. + */ + public String getNextDirectory(String currentDir) throws IOException { + // Fast check, if the next day directory exists return it. + String nextDate = getDirForDate(getDateFromDir(currentDir).plusDays(1)); + FileSystem fileSystem = basePath.getFileSystem(conf); + if (fileSystem.exists(new Path(basePath, nextDate))) { + return nextDate; + } + // Have to scan the directory to find min date greater than currentDir. + String dirName = null; + RemoteIterator iter = fileSystem.listStatusIterator(basePath); + while (iter.hasNext()) { + FileStatus status = iter.next(); + String name = status.getPath().getName(); + // String comparison is good enough, since its of form date=yyyy-MM-dd + if (name.compareTo(currentDir) > 0 && (dirName == null || name.compareTo(dirName) < 0)) { + dirName = name; + } + } + return dirName; + } + + /** + * Returns new or changed files in the given directory. The offsets are used to find + * changed files. + */ + public List scanForChangedFiles(String subDir, Map currentOffsets) + throws IOException { + Path dirPath = new Path(basePath, subDir); + FileSystem fileSystem = basePath.getFileSystem(conf); + List newFiles = new ArrayList<>(); + if (!fileSystem.exists(dirPath)) { + return newFiles; + } + RemoteIterator iter = fileSystem.listStatusIterator(dirPath); + while (iter.hasNext()) { + FileStatus status = iter.next(); + String fileName = status.getPath().getName(); + Long offset = currentOffsets.get(fileName); + // If the offset was never added or offset < fileSize. + if (offset == null || offset < status.getLen()) { + newFiles.add(status); + } + } + return newFiles; + } + + /** + * Returns the current time, using the underlying clock in UTC time. + */ + public LocalDateTime getNow() { + // Use UTC date to ensure reader date is same on all timezones. + return LocalDateTime.ofEpochSecond(clock.getTime() / 1000, 0, ZoneOffset.UTC); + } + + public Configuration getConfig() { + return conf; + } +} diff --git a/tez-plugins/tez-protobuf-history-plugin/src/main/java/org/apache/tez/dag/history/logging/proto/HistoryEventProtoConverter.java b/tez-plugins/tez-protobuf-history-plugin/src/main/java/org/apache/tez/dag/history/logging/proto/HistoryEventProtoConverter.java new file mode 100644 index 0000000000..904c165684 --- /dev/null +++ b/tez-plugins/tez-protobuf-history-plugin/src/main/java/org/apache/tez/dag/history/logging/proto/HistoryEventProtoConverter.java @@ -0,0 +1,502 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

    + * http://www.apache.org/licenses/LICENSE-2.0 + *

    + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tez.dag.history.logging.proto; + +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; +import java.util.Map.Entry; +import java.util.TreeMap; + +import com.fasterxml.jackson.databind.ObjectMapper; +import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; +import org.apache.hadoop.yarn.api.records.ApplicationId; +import org.apache.tez.common.ATSConstants; +import org.apache.tez.dag.api.EdgeProperty; +import org.apache.tez.dag.api.TezUncheckedException; +import org.apache.tez.dag.api.oldrecords.TaskAttemptState; +import org.apache.tez.dag.api.records.DAGProtos.CallerContextProto; +import org.apache.tez.dag.app.web.AMWebController; +import org.apache.tez.dag.history.HistoryEvent; +import org.apache.tez.dag.history.events.AMLaunchedEvent; +import org.apache.tez.dag.history.events.AMStartedEvent; +import org.apache.tez.dag.history.events.AppLaunchedEvent; +import org.apache.tez.dag.history.events.ContainerLaunchedEvent; +import org.apache.tez.dag.history.events.ContainerStoppedEvent; +import org.apache.tez.dag.history.events.DAGFinishedEvent; +import org.apache.tez.dag.history.events.DAGInitializedEvent; +import org.apache.tez.dag.history.events.DAGRecoveredEvent; +import org.apache.tez.dag.history.events.DAGStartedEvent; +import org.apache.tez.dag.history.events.DAGSubmittedEvent; +import org.apache.tez.dag.history.events.TaskAttemptFinishedEvent; +import org.apache.tez.dag.history.events.TaskAttemptStartedEvent; +import org.apache.tez.dag.history.events.TaskFinishedEvent; +import org.apache.tez.dag.history.events.TaskStartedEvent; +import org.apache.tez.dag.history.events.VertexConfigurationDoneEvent; +import org.apache.tez.dag.history.events.VertexFinishedEvent; +import org.apache.tez.dag.history.events.VertexInitializedEvent; +import org.apache.tez.dag.history.events.VertexStartedEvent; +import org.apache.tez.dag.history.logging.proto.HistoryLoggerProtos.HistoryEventProto; +import org.apache.tez.dag.history.logging.proto.HistoryLoggerProtos.KVPair; +import org.apache.tez.dag.history.utils.DAGUtils; +import org.apache.tez.dag.records.TezDAGID; +import org.apache.tez.dag.records.TezTaskAttemptID; +import org.apache.tez.dag.records.TezTaskID; +import org.apache.tez.dag.records.TezVertexID; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Convert history event into HistoryEventProto message. + */ +public class HistoryEventProtoConverter { + private static final Logger log = + LoggerFactory.getLogger(HistoryEventProtoConverter.class); + + private final ObjectMapper mapper = new ObjectMapper(); + + /** + * Convert a given history event to HistoryEventProto message. + */ + public HistoryEventProto convert(HistoryEvent historyEvent) { + validateEvent(historyEvent); + switch (historyEvent.getEventType()) { + case APP_LAUNCHED: + return convertAppLaunchedEvent((AppLaunchedEvent) historyEvent); + case AM_LAUNCHED: + return convertAMLaunchedEvent((AMLaunchedEvent) historyEvent); + case AM_STARTED: + return convertAMStartedEvent((AMStartedEvent) historyEvent); + case CONTAINER_LAUNCHED: + return convertContainerLaunchedEvent((ContainerLaunchedEvent) historyEvent); + case CONTAINER_STOPPED: + return convertContainerStoppedEvent((ContainerStoppedEvent) historyEvent); + case DAG_SUBMITTED: + return convertDAGSubmittedEvent((DAGSubmittedEvent) historyEvent); + case DAG_INITIALIZED: + return convertDAGInitializedEvent((DAGInitializedEvent) historyEvent); + case DAG_STARTED: + return convertDAGStartedEvent((DAGStartedEvent) historyEvent); + case DAG_FINISHED: + return convertDAGFinishedEvent((DAGFinishedEvent) historyEvent); + case VERTEX_INITIALIZED: + return convertVertexInitializedEvent((VertexInitializedEvent) historyEvent); + case VERTEX_STARTED: + return convertVertexStartedEvent((VertexStartedEvent) historyEvent); + case VERTEX_FINISHED: + return convertVertexFinishedEvent((VertexFinishedEvent) historyEvent); + case TASK_STARTED: + return convertTaskStartedEvent((TaskStartedEvent) historyEvent); + case TASK_FINISHED: + return convertTaskFinishedEvent((TaskFinishedEvent) historyEvent); + case TASK_ATTEMPT_STARTED: + return convertTaskAttemptStartedEvent((TaskAttemptStartedEvent) historyEvent); + case TASK_ATTEMPT_FINISHED: + return convertTaskAttemptFinishedEvent((TaskAttemptFinishedEvent) historyEvent); + case VERTEX_CONFIGURE_DONE: + return convertVertexReconfigureDoneEvent((VertexConfigurationDoneEvent) historyEvent); + case DAG_RECOVERED: + return convertDAGRecoveredEvent((DAGRecoveredEvent) historyEvent); + case VERTEX_COMMIT_STARTED: + case VERTEX_GROUP_COMMIT_STARTED: + case VERTEX_GROUP_COMMIT_FINISHED: + case DAG_COMMIT_STARTED: + case DAG_KILL_REQUEST: + throw new UnsupportedOperationException("Invalid Event, does not support history, eventType=" + + historyEvent.getEventType()); + // Do not add default, if a new event type is added, we'll get a warning for the + // switch. + } + throw new UnsupportedOperationException( + "Unhandled Event, eventType=" + historyEvent.getEventType()); + } + + private void validateEvent(HistoryEvent event) { + if (!event.isHistoryEvent()) { + throw new UnsupportedOperationException( + "Invalid Event, does not support history" + ", eventType=" + event.getEventType()); + } + } + + private HistoryEventProto.Builder makeBuilderForEvent(HistoryEvent event, long time, + TezDAGID dagId, ApplicationId appId, ApplicationAttemptId appAttemptId, TezVertexID vertexId, + TezTaskID taskId, TezTaskAttemptID taskAttemptId, String user) { + HistoryEventProto.Builder builder = HistoryEventProto.newBuilder(); + builder.setEventType(event.getEventType().name()); + builder.setEventTime(time); + if (taskAttemptId != null) { + builder.setTaskAttemptId(taskAttemptId.toString()); + taskId = taskAttemptId.getTaskID(); + } + if (taskId != null) { + builder.setTaskId(taskId.toString()); + vertexId = taskId.getVertexID(); + } + if (vertexId != null) { + builder.setVertexId(vertexId.toString()); + dagId = vertexId.getDAGID(); + } + if (dagId != null) { + builder.setDagId(dagId.toString()); + if (appId == null) { + appId = dagId.getApplicationId(); + } + } + if (appAttemptId != null) { + builder.setAppAttemptId(appAttemptId.toString()); + if (appId == null) { + appId = appAttemptId.getApplicationId(); + } + } + if (appId != null) { + builder.setAppId(appId.toString()); + } + if (user != null) { + builder.setUser(user); + } + return builder; + } + + private void addEventData(HistoryEventProto.Builder builder, String key, String value) { + if (value == null) { + return; + } + builder.addEventData(KVPair.newBuilder().setKey(key).setValue(value)); + } + + private void addEventData(HistoryEventProto.Builder builder, String key, Number value) { + builder.addEventData(KVPair.newBuilder().setKey(key).setValue(value.toString())); + } + + private void addEventData(HistoryEventProto.Builder builder, String key, + Map value) { + try { + builder.addEventData( + KVPair.newBuilder().setKey(key).setValue(mapper.writeValueAsString(value))); + } catch (IOException e) { + log.error("Error converting value for key {} to json: ", key, e); + } + } + + private HistoryEventProto convertAppLaunchedEvent(AppLaunchedEvent event) { + HistoryEventProto.Builder builder = makeBuilderForEvent(event, event.getLaunchTime(), null, + event.getApplicationId(), null, null, null, null, event.getUser()); + // This is ok as long as we do not modify the underlying map. + @SuppressWarnings({ "unchecked", "rawtypes" }) + Map confMap = (Map)DAGUtils.convertConfigurationToATSMap(event.getConf()); + addEventData(builder, ATSConstants.CONFIG, confMap); + if (event.getVersion() != null) { + addEventData(builder, ATSConstants.TEZ_VERSION, + DAGUtils.convertTezVersionToATSMap(event.getVersion())); + } + addEventData(builder, ATSConstants.DAG_AM_WEB_SERVICE_VERSION, AMWebController.VERSION); + return builder.build(); + } + + private HistoryEventProto convertAMLaunchedEvent(AMLaunchedEvent event) { + HistoryEventProto.Builder builder = makeBuilderForEvent(event, event.getLaunchTime(), null, + null, event.getApplicationAttemptId(), null, null, null, event.getUser()); + addEventData(builder, ATSConstants.APP_SUBMIT_TIME, event.getAppSubmitTime()); + return builder.build(); + } + + private HistoryEventProto convertAMStartedEvent(AMStartedEvent event) { + HistoryEventProto.Builder builder = makeBuilderForEvent(event, event.getStartTime(), null, + null, event.getApplicationAttemptId(), null, null, null, event.getUser()); + return builder.build(); + } + + private HistoryEventProto convertContainerLaunchedEvent(ContainerLaunchedEvent event) { + HistoryEventProto.Builder builder = makeBuilderForEvent(event, event.getLaunchTime(), null, + null, event.getApplicationAttemptId(), null, null, null, null); + addEventData(builder, ATSConstants.CONTAINER_ID, event.getContainerId().toString()); + return builder.build(); + } + + private HistoryEventProto convertContainerStoppedEvent(ContainerStoppedEvent event) { + HistoryEventProto.Builder builder = makeBuilderForEvent(event, event.getStoppedTime(), null, + null, event.getApplicationAttemptId(), null, null, null, null); + addEventData(builder, ATSConstants.CONTAINER_ID, event.getContainerId().toString()); + addEventData(builder, ATSConstants.EXIT_STATUS, event.getExitStatus()); + addEventData(builder, ATSConstants.FINISH_TIME, event.getStoppedTime()); + return builder.build(); + } + + private HistoryEventProto convertDAGSubmittedEvent(DAGSubmittedEvent event) { + HistoryEventProto.Builder builder = makeBuilderForEvent(event, event.getSubmitTime(), + event.getDAGID(), null, event.getApplicationAttemptId(), null, null, null, + event.getUser()); + addEventData(builder, ATSConstants.DAG_NAME, event.getDAGName()); + if (event.getDAGPlan().hasCallerContext() && + event.getDAGPlan().getCallerContext().hasCallerId()) { + CallerContextProto callerContext = event.getDagPlan().getCallerContext(); + addEventData(builder, ATSConstants.CALLER_CONTEXT_ID, callerContext.getCallerId()); + addEventData(builder, ATSConstants.CALLER_CONTEXT_TYPE, callerContext.getCallerType()); + addEventData(builder, ATSConstants.CALLER_CONTEXT, callerContext.getContext()); + } + if (event.getQueueName() != null) { + addEventData(builder, ATSConstants.DAG_QUEUE_NAME, event.getQueueName()); + } + addEventData(builder, ATSConstants.DAG_AM_WEB_SERVICE_VERSION, AMWebController.VERSION); + addEventData(builder, ATSConstants.IN_PROGRESS_LOGS_URL + "_" + + event.getApplicationAttemptId().getAttemptId(), event.getContainerLogs()); + try { + addEventData(builder, ATSConstants.DAG_PLAN, + DAGUtils.convertDAGPlanToATSMap(event.getDAGPlan())); + } catch (IOException e) { + throw new TezUncheckedException(e); + } + return builder.build(); + } + + private HistoryEventProto convertDAGInitializedEvent(DAGInitializedEvent event) { + HistoryEventProto.Builder builder = makeBuilderForEvent(event, event.getInitTime(), + event.getDAGID(), null, null, null, null, null, event.getUser()); + addEventData(builder, ATSConstants.DAG_NAME, event.getDagName()); + + if (event.getVertexNameIDMap() != null) { + Map nameIdStrMap = new TreeMap<>(); + for (Entry entry : event.getVertexNameIDMap().entrySet()) { + nameIdStrMap.put(entry.getKey(), entry.getValue().toString()); + } + addEventData(builder, ATSConstants.VERTEX_NAME_ID_MAPPING, nameIdStrMap); + } + return builder.build(); + } + + private HistoryEventProto convertDAGStartedEvent(DAGStartedEvent event) { + HistoryEventProto.Builder builder = makeBuilderForEvent(event, event.getStartTime(), + event.getDAGID(), null, null, null, null, null, event.getUser()); + + addEventData(builder, ATSConstants.DAG_NAME, event.getDagName()); + addEventData(builder, ATSConstants.STATUS, event.getDagState().name()); + + return builder.build(); + } + + private HistoryEventProto convertDAGFinishedEvent(DAGFinishedEvent event) { + HistoryEventProto.Builder builder = makeBuilderForEvent(event, event.getFinishTime(), + event.getDAGID(), null, event.getApplicationAttemptId(), null, null, null, + event.getUser()); + addEventData(builder, ATSConstants.DAG_NAME, event.getDagName()); + if (event.getDAGPlan().hasCallerContext()) { + if (event.getDAGPlan().getCallerContext().hasCallerType()) { + addEventData(builder, ATSConstants.CALLER_CONTEXT_TYPE, + event.getDAGPlan().getCallerContext().getCallerType()); + } + if (event.getDAGPlan().getCallerContext().hasCallerId()) { + addEventData(builder, ATSConstants.CALLER_CONTEXT_ID, + event.getDAGPlan().getCallerContext().getCallerId()); + } + } + addEventData(builder, ATSConstants.START_TIME, event.getStartTime()); + addEventData(builder, ATSConstants.TIME_TAKEN, (event.getFinishTime() - event.getStartTime())); + addEventData(builder, ATSConstants.STATUS, event.getState().name()); + addEventData(builder, ATSConstants.DIAGNOSTICS, event.getDiagnostics()); + addEventData(builder, ATSConstants.COMPLETION_APPLICATION_ATTEMPT_ID, + event.getApplicationAttemptId().toString()); + addEventData(builder, ATSConstants.COUNTERS, + DAGUtils.convertCountersToATSMap(event.getTezCounters())); + Map dagTaskStats = event.getDagTaskStats(); + if (dagTaskStats != null) { + for (Entry entry : dagTaskStats.entrySet()) { + addEventData(builder, entry.getKey(), entry.getValue()); + } + } + return builder.build(); + } + + private HistoryEventProto convertTaskAttemptStartedEvent(TaskAttemptStartedEvent event) { + HistoryEventProto.Builder builder = makeBuilderForEvent(event, event.getStartTime(), + null, null, null, null, null, event.getTaskAttemptID(), null); + if (event.getInProgressLogsUrl() != null) { + addEventData(builder, ATSConstants.IN_PROGRESS_LOGS_URL, event.getInProgressLogsUrl()); + } + if (event.getCompletedLogsUrl() != null) { + addEventData(builder, ATSConstants.COMPLETED_LOGS_URL, event.getCompletedLogsUrl()); + } + addEventData(builder, ATSConstants.NODE_ID, event.getNodeId().toString()); + addEventData(builder, ATSConstants.NODE_HTTP_ADDRESS, event.getNodeHttpAddress()); + addEventData(builder, ATSConstants.CONTAINER_ID, event.getContainerId().toString()); + addEventData(builder, ATSConstants.STATUS, TaskAttemptState.RUNNING.name()); + + return builder.build(); + } + + private HistoryEventProto convertTaskAttemptFinishedEvent(TaskAttemptFinishedEvent event) { + HistoryEventProto.Builder builder = makeBuilderForEvent(event, event.getFinishTime(), + null, null, null, null, null, event.getTaskAttemptID(), null); + + addEventData(builder, ATSConstants.STATUS, event.getState().name()); + if (event.getTaskFailureType() != null) { + addEventData(builder, ATSConstants.TASK_FAILURE_TYPE, event.getTaskFailureType().name()); + } + + addEventData(builder, ATSConstants.CREATION_TIME, event.getCreationTime()); + addEventData(builder, ATSConstants.ALLOCATION_TIME, event.getAllocationTime()); + addEventData(builder, ATSConstants.START_TIME, event.getStartTime()); + + if (event.getCreationCausalTA() != null) { + addEventData(builder, ATSConstants.CREATION_CAUSAL_ATTEMPT, + event.getCreationCausalTA().toString()); + } + addEventData(builder, ATSConstants.TIME_TAKEN, (event.getFinishTime() - event.getStartTime())); + addEventData(builder, ATSConstants.STATUS, event.getState().name()); + + if (event.getTaskAttemptError() != null) { + addEventData(builder, ATSConstants.TASK_ATTEMPT_ERROR_ENUM, + event.getTaskAttemptError().name()); + } + addEventData(builder, ATSConstants.DIAGNOSTICS, event.getDiagnostics()); + addEventData(builder, ATSConstants.COUNTERS, + DAGUtils.convertCountersToATSMap(event.getCounters())); + if (event.getDataEvents() != null && !event.getDataEvents().isEmpty()) { + addEventData(builder, ATSConstants.LAST_DATA_EVENTS, + DAGUtils.convertDataEventDependecyInfoToATS(event.getDataEvents())); + } + if (event.getNodeId() != null) { + addEventData(builder, ATSConstants.NODE_ID, event.getNodeId().toString()); + } + if (event.getContainerId() != null) { + addEventData(builder, ATSConstants.CONTAINER_ID, event.getContainerId().toString()); + } + if (event.getInProgressLogsUrl() != null) { + addEventData(builder, ATSConstants.IN_PROGRESS_LOGS_URL, event.getInProgressLogsUrl()); + } + if (event.getCompletedLogsUrl() != null) { + addEventData(builder, ATSConstants.COMPLETED_LOGS_URL, event.getCompletedLogsUrl()); + } + if (event.getNodeHttpAddress() != null) { + addEventData(builder, ATSConstants.NODE_HTTP_ADDRESS, event.getNodeHttpAddress()); + } + + return builder.build(); + } + + private HistoryEventProto convertTaskFinishedEvent(TaskFinishedEvent event) { + HistoryEventProto.Builder builder = makeBuilderForEvent(event, event.getFinishTime(), + null, null, null, null, event.getTaskID(), null, null); + + addEventData(builder, ATSConstants.TIME_TAKEN, (event.getFinishTime() - event.getStartTime())); + addEventData(builder, ATSConstants.STATUS, event.getState().name()); + addEventData(builder, ATSConstants.NUM_FAILED_TASKS_ATTEMPTS, event.getNumFailedAttempts()); + if (event.getSuccessfulAttemptID() != null) { + addEventData(builder, ATSConstants.SUCCESSFUL_ATTEMPT_ID, + event.getSuccessfulAttemptID().toString()); + } + + addEventData(builder, ATSConstants.DIAGNOSTICS, event.getDiagnostics()); + addEventData(builder, ATSConstants.COUNTERS, + DAGUtils.convertCountersToATSMap(event.getTezCounters())); + + return builder.build(); + } + + private HistoryEventProto convertTaskStartedEvent(TaskStartedEvent event) { + HistoryEventProto.Builder builder = makeBuilderForEvent(event, event.getStartTime(), + null, null, null, null, event.getTaskID(), null, null); + + addEventData(builder, ATSConstants.SCHEDULED_TIME, event.getScheduledTime()); + addEventData(builder, ATSConstants.STATUS, event.getState().name()); + + return builder.build(); + } + + private HistoryEventProto convertVertexFinishedEvent(VertexFinishedEvent event) { + HistoryEventProto.Builder builder = makeBuilderForEvent(event, event.getFinishTime(), + null, null, null, event.getVertexID(), null, null, null); + + addEventData(builder, ATSConstants.STATUS, event.getState().name()); + addEventData(builder, ATSConstants.VERTEX_NAME, event.getVertexName()); + addEventData(builder, ATSConstants.TIME_TAKEN, (event.getFinishTime() - event.getStartTime())); + addEventData(builder, ATSConstants.DIAGNOSTICS, event.getDiagnostics()); + addEventData(builder, ATSConstants.COUNTERS, + DAGUtils.convertCountersToATSMap(event.getTezCounters())); + addEventData(builder, ATSConstants.STATS, + DAGUtils.convertVertexStatsToATSMap(event.getVertexStats())); + if (event.getServicePluginInfo() != null) { + addEventData(builder, ATSConstants.SERVICE_PLUGIN, + DAGUtils.convertServicePluginToATSMap(event.getServicePluginInfo())); + } + + final Map vertexTaskStats = event.getVertexTaskStats(); + if (vertexTaskStats != null) { + for (Entry entry : vertexTaskStats.entrySet()) { + addEventData(builder, entry.getKey(), entry.getValue()); + } + } + + return builder.build(); + } + + private HistoryEventProto convertVertexInitializedEvent(VertexInitializedEvent event) { + HistoryEventProto.Builder builder = makeBuilderForEvent(event, event.getInitedTime(), + null, null, null, event.getVertexID(), null, null, null); + addEventData(builder, ATSConstants.VERTEX_NAME, event.getVertexName()); + addEventData(builder, ATSConstants.INIT_REQUESTED_TIME, event.getInitRequestedTime()); + addEventData(builder, ATSConstants.INIT_TIME, event.getInitedTime()); + addEventData(builder, ATSConstants.NUM_TASKS, event.getNumTasks()); + addEventData(builder, ATSConstants.PROCESSOR_CLASS_NAME, event.getProcessorName()); + if (event.getServicePluginInfo() != null) { + addEventData(builder, ATSConstants.SERVICE_PLUGIN, + DAGUtils.convertServicePluginToATSMap(event.getServicePluginInfo())); + } + + return builder.build(); + } + + private HistoryEventProto convertVertexStartedEvent(VertexStartedEvent event) { + HistoryEventProto.Builder builder = makeBuilderForEvent(event, event.getStartTime(), + null, null, null, event.getVertexID(), null, null, null); + addEventData(builder, ATSConstants.START_REQUESTED_TIME, event.getStartRequestedTime()); + addEventData(builder, ATSConstants.STATUS, event.getVertexState().name()); + return builder.build(); + } + + private HistoryEventProto convertVertexReconfigureDoneEvent(VertexConfigurationDoneEvent event) { + HistoryEventProto.Builder builder = makeBuilderForEvent(event, event.getReconfigureDoneTime(), + null, null, null, event.getVertexID(), null, null, null); + if (event.getSourceEdgeProperties() != null && !event.getSourceEdgeProperties().isEmpty()) { + Map updatedEdgeManagers = new HashMap<>(); + for (Entry entry : event.getSourceEdgeProperties().entrySet()) { + updatedEdgeManagers.put(entry.getKey(), DAGUtils.convertEdgeProperty(entry.getValue())); + } + addEventData(builder, ATSConstants.UPDATED_EDGE_MANAGERS, updatedEdgeManagers); + } + addEventData(builder, ATSConstants.NUM_TASKS, event.getNumTasks()); + return builder.build(); + } + + private HistoryEventProto convertDAGRecoveredEvent(DAGRecoveredEvent event) { + HistoryEventProto.Builder builder = makeBuilderForEvent(event, event.getRecoveredTime(), + event.getDagID(), null, event.getApplicationAttemptId(), null, null, null, + event.getUser()); + addEventData(builder, ATSConstants.DAG_NAME, event.getDagName()); + if (event.getRecoveredDagState() != null) { + addEventData(builder, ATSConstants.DAG_STATE, event.getRecoveredDagState().name()); + } + if (event.getRecoveryFailureReason() != null) { + addEventData(builder, ATSConstants.RECOVERY_FAILURE_REASON, + event.getRecoveryFailureReason()); + } + addEventData(builder, ATSConstants.IN_PROGRESS_LOGS_URL + "_" + + event.getApplicationAttemptId().getAttemptId(), event.getContainerLogs()); + return builder.build(); + } +} diff --git a/tez-plugins/tez-protobuf-history-plugin/src/main/java/org/apache/tez/dag/history/logging/proto/HistoryEventProtoJsonConversion.java b/tez-plugins/tez-protobuf-history-plugin/src/main/java/org/apache/tez/dag/history/logging/proto/HistoryEventProtoJsonConversion.java new file mode 100644 index 0000000000..ef84b2ec61 --- /dev/null +++ b/tez-plugins/tez-protobuf-history-plugin/src/main/java/org/apache/tez/dag/history/logging/proto/HistoryEventProtoJsonConversion.java @@ -0,0 +1,768 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tez.dag.history.logging.proto; + +import java.util.Iterator; +import java.util.Optional; + +import org.apache.tez.common.ATSConstants; +import org.apache.tez.dag.history.HistoryEventType; +import org.apache.tez.dag.history.logging.EntityTypes; +import org.apache.tez.dag.history.logging.proto.HistoryLoggerProtos.HistoryEventProto; +import org.apache.tez.dag.history.logging.proto.HistoryLoggerProtos.KVPair; +import org.codehaus.jettison.json.JSONArray; +import org.codehaus.jettison.json.JSONException; +import org.codehaus.jettison.json.JSONObject; + +/** + * Convert HistoryEventProto into JSONObject for analyzers, which can already consume the output of + * SimpleHistoryLoggingService's JSONs. This class is based on HistoryEventJsonConversion, and all + * of the specific HistoryEvent calls were transformed info HistoryEventProto calls by taking the + * corresponding HistoryEventProtoConverter methods into consideration. + */ +public final class HistoryEventProtoJsonConversion { + + private HistoryEventProtoJsonConversion() { + } + + public static JSONObject convertToJson(HistoryEventProto historyEvent) throws JSONException { + JSONObject jsonObject = null; + + switch (historyEvent.getEventType()) { + case "APP_LAUNCHED": + jsonObject = convertAppLaunchedEvent(historyEvent); + break; + case "AM_LAUNCHED": + jsonObject = convertAMLaunchedEvent(historyEvent); + break; + case "AM_STARTED": + jsonObject = convertAMStartedEvent(historyEvent); + break; + case "CONTAINER_LAUNCHED": + jsonObject = convertContainerLaunchedEvent(historyEvent); + break; + case "CONTAINER_STOPPED": + jsonObject = convertContainerStoppedEvent(historyEvent); + break; + case "DAG_SUBMITTED": + jsonObject = convertDAGSubmittedEvent(historyEvent); + break; + case "DAG_INITIALIZED": + jsonObject = convertDAGInitializedEvent(historyEvent); + break; + case "DAG_STARTED": + jsonObject = convertDAGStartedEvent(historyEvent); + break; + case "DAG_FINISHED": + jsonObject = convertDAGFinishedEvent(historyEvent); + break; + case "VERTEX_INITIALIZED": + jsonObject = convertVertexInitializedEvent(historyEvent); + break; + case "VERTEX_STARTED": + jsonObject = convertVertexStartedEvent(historyEvent); + break; + case "VERTEX_FINISHED": + jsonObject = convertVertexFinishedEvent(historyEvent); + break; + case "TASK_STARTED": + jsonObject = convertTaskStartedEvent(historyEvent); + break; + case "TASK_FINISHED": + jsonObject = convertTaskFinishedEvent(historyEvent); + break; + case "TASK_ATTEMPT_STARTED": + jsonObject = convertTaskAttemptStartedEvent(historyEvent); + break; + case "TASK_ATTEMPT_FINISHED": + jsonObject = convertTaskAttemptFinishedEvent(historyEvent); + break; + case "VERTEX_CONFIGURE_DONE": + jsonObject = convertVertexReconfigureDoneEvent(historyEvent); + break; + case "DAG_RECOVERED": + jsonObject = convertDAGRecoveredEvent(historyEvent); + break; + case "VERTEX_COMMIT_STARTED": + case "VERTEX_GROUP_COMMIT_STARTED": + case "VERTEX_GROUP_COMMIT_FINISHED": + case "DAG_COMMIT_STARTED": + throw new UnsupportedOperationException( + "Invalid Event, does not support history" + ", eventType=" + historyEvent.getEventType()); + default: + throw new UnsupportedOperationException( + "Unhandled Event" + ", eventType=" + historyEvent.getEventType()); + } + return jsonObject; + } + + private static JSONObject convertDAGRecoveredEvent(HistoryEventProto event) throws JSONException { + JSONObject jsonObject = new JSONObject(); + jsonObject.put(ATSConstants.ENTITY, event.getDagId()); + jsonObject.put(ATSConstants.ENTITY_TYPE, EntityTypes.TEZ_DAG_ID.name()); + + // Related Entities not needed as should have been done in + // dag submission event + + JSONArray events = new JSONArray(); + JSONObject recoverEvent = new JSONObject(); + recoverEvent.put(ATSConstants.TIMESTAMP, event.getEventTime()); + recoverEvent.put(ATSConstants.EVENT_TYPE, HistoryEventType.DAG_RECOVERED.name()); + + JSONObject recoverEventInfo = new JSONObject(); + recoverEventInfo.put(ATSConstants.APPLICATION_ATTEMPT_ID, event.getAppAttemptId().toString()); + recoverEventInfo.put(ATSConstants.DAG_STATE, getDataValueByKey(event, ATSConstants.DAG_STATE)); + recoverEventInfo.put(ATSConstants.RECOVERY_FAILURE_REASON, + getDataValueByKey(event, ATSConstants.RECOVERY_FAILURE_REASON)); + + recoverEvent.put(ATSConstants.EVENT_INFO, recoverEventInfo); + events.put(recoverEvent); + + jsonObject.put(ATSConstants.EVENTS, events); + + return jsonObject; + } + + private static JSONObject convertAppLaunchedEvent(HistoryEventProto event) throws JSONException { + JSONObject jsonObject = new JSONObject(); + jsonObject.put(ATSConstants.ENTITY, "tez_" + event.getAppId().toString()); + jsonObject.put(ATSConstants.ENTITY_TYPE, EntityTypes.TEZ_APPLICATION.name()); + + // Other info to tag with Tez App + JSONObject otherInfo = new JSONObject(); + otherInfo.put(ATSConstants.USER, event.getUser()); + otherInfo.put(ATSConstants.CONFIG, new JSONObject()); // TODO: config from proto? + + jsonObject.put(ATSConstants.OTHER_INFO, otherInfo); + + return jsonObject; + } + + private static JSONObject convertAMLaunchedEvent(HistoryEventProto event) throws JSONException { + JSONObject jsonObject = new JSONObject(); + jsonObject.put(ATSConstants.ENTITY, "tez_" + event.getAppAttemptId().toString()); + jsonObject.put(ATSConstants.ENTITY_TYPE, EntityTypes.TEZ_APPLICATION_ATTEMPT.name()); + + // Related Entities + JSONArray relatedEntities = new JSONArray(); + JSONObject appEntity = new JSONObject(); + appEntity.put(ATSConstants.ENTITY, event.getAppId().toString()); + appEntity.put(ATSConstants.ENTITY_TYPE, ATSConstants.APPLICATION_ID); + JSONObject appAttemptEntity = new JSONObject(); + appAttemptEntity.put(ATSConstants.ENTITY, event.getAppAttemptId().toString()); + appAttemptEntity.put(ATSConstants.ENTITY_TYPE, ATSConstants.APPLICATION_ATTEMPT_ID); + relatedEntities.put(appEntity); + relatedEntities.put(appAttemptEntity); + jsonObject.put(ATSConstants.RELATED_ENTITIES, relatedEntities); + + // TODO decide whether this goes into different events, + // event info or other info. + JSONArray events = new JSONArray(); + JSONObject initEvent = new JSONObject(); + initEvent.put(ATSConstants.TIMESTAMP, event.getEventTime()); + initEvent.put(ATSConstants.EVENT_TYPE, HistoryEventType.AM_LAUNCHED.name()); + events.put(initEvent); + jsonObject.put(ATSConstants.EVENTS, events); + + // Other info to tag with Tez AM + JSONObject otherInfo = new JSONObject(); + otherInfo.put(ATSConstants.APP_SUBMIT_TIME, + getDataValueByKey(event, ATSConstants.APP_SUBMIT_TIME)); + jsonObject.put(ATSConstants.OTHER_INFO, otherInfo); + + return jsonObject; + } + + private static JSONObject convertAMStartedEvent(HistoryEventProto event) throws JSONException { + JSONObject jsonObject = new JSONObject(); + jsonObject.put(ATSConstants.ENTITY, "tez_" + event.getAppAttemptId().toString()); + jsonObject.put(ATSConstants.ENTITY_TYPE, EntityTypes.TEZ_APPLICATION_ATTEMPT.name()); + + // Related Entities + JSONArray relatedEntities = new JSONArray(); + JSONObject appEntity = new JSONObject(); + appEntity.put(ATSConstants.ENTITY, event.getAppId().toString()); + appEntity.put(ATSConstants.ENTITY_TYPE, ATSConstants.APPLICATION_ID); + JSONObject appAttemptEntity = new JSONObject(); + appAttemptEntity.put(ATSConstants.ENTITY, event.getAppAttemptId().toString()); + appAttemptEntity.put(ATSConstants.ENTITY_TYPE, ATSConstants.APPLICATION_ATTEMPT_ID); + relatedEntities.put(appEntity); + relatedEntities.put(appAttemptEntity); + jsonObject.put(ATSConstants.RELATED_ENTITIES, relatedEntities); + + // TODO decide whether this goes into different events, + // event info or other info. + JSONArray events = new JSONArray(); + JSONObject startEvent = new JSONObject(); + startEvent.put(ATSConstants.TIMESTAMP, event.getEventTime()); + startEvent.put(ATSConstants.EVENT_TYPE, HistoryEventType.AM_STARTED.name()); + events.put(startEvent); + jsonObject.put(ATSConstants.EVENTS, events); + + return jsonObject; + } + + private static JSONObject convertContainerLaunchedEvent(HistoryEventProto event) + throws JSONException { + JSONObject jsonObject = new JSONObject(); + jsonObject.put(ATSConstants.ENTITY, + "tez_" + getDataValueByKey(event, ATSConstants.CONTAINER_ID)); + jsonObject.put(ATSConstants.ENTITY_TYPE, EntityTypes.TEZ_CONTAINER_ID.name()); + + JSONArray relatedEntities = new JSONArray(); + JSONObject appAttemptEntity = new JSONObject(); + appAttemptEntity.put(ATSConstants.ENTITY, event.getAppAttemptId().toString()); + appAttemptEntity.put(ATSConstants.ENTITY_TYPE, EntityTypes.TEZ_APPLICATION_ATTEMPT.name()); + + JSONObject containerEntity = new JSONObject(); + containerEntity.put(ATSConstants.ENTITY, getDataValueByKey(event, ATSConstants.CONTAINER_ID)); + containerEntity.put(ATSConstants.ENTITY_TYPE, ATSConstants.CONTAINER_ID); + + relatedEntities.put(appAttemptEntity); + relatedEntities.put(containerEntity); + jsonObject.put(ATSConstants.RELATED_ENTITIES, relatedEntities); + + // TODO decide whether this goes into different events, + // event info or other info. + JSONArray events = new JSONArray(); + JSONObject launchEvent = new JSONObject(); + launchEvent.put(ATSConstants.TIMESTAMP, event.getEventTime()); + launchEvent.put(ATSConstants.EVENT_TYPE, HistoryEventType.CONTAINER_LAUNCHED.name()); + events.put(launchEvent); + jsonObject.put(ATSConstants.EVENTS, events); + + // TODO add other container info here? or assume AHS will have this? + // TODO container logs? + + return jsonObject; + } + + private static JSONObject convertContainerStoppedEvent(HistoryEventProto event) + throws JSONException { + // structure is identical to ContainerLaunchedEvent + JSONObject jsonObject = new JSONObject(); + jsonObject.put(ATSConstants.ENTITY, + "tez_" + getDataValueByKey(event, ATSConstants.CONTAINER_ID)); + jsonObject.put(ATSConstants.ENTITY_TYPE, EntityTypes.TEZ_CONTAINER_ID.name()); + + JSONArray relatedEntities = new JSONArray(); + JSONObject appAttemptEntity = new JSONObject(); + appAttemptEntity.put(ATSConstants.ENTITY, event.getAppAttemptId().toString()); + appAttemptEntity.put(ATSConstants.ENTITY_TYPE, EntityTypes.TEZ_APPLICATION_ATTEMPT.name()); + + JSONObject containerEntity = new JSONObject(); + containerEntity.put(ATSConstants.ENTITY, getDataValueByKey(event, ATSConstants.CONTAINER_ID)); + containerEntity.put(ATSConstants.ENTITY_TYPE, ATSConstants.CONTAINER_ID); + + relatedEntities.put(appAttemptEntity); + relatedEntities.put(containerEntity); + jsonObject.put(ATSConstants.RELATED_ENTITIES, relatedEntities); + + // TODO decide whether this goes into different events, + // event info or other info. + JSONArray events = new JSONArray(); + JSONObject stopEvent = new JSONObject(); + stopEvent.put(ATSConstants.TIMESTAMP, event.getEventTime()); + stopEvent.put(ATSConstants.EVENT_TYPE, HistoryEventType.CONTAINER_STOPPED.name()); + events.put(stopEvent); + jsonObject.put(ATSConstants.EVENTS, events); + + // TODO add other container info here? or assume AHS will have this? + // TODO container logs? + + // Other info + JSONObject otherInfo = new JSONObject(); + otherInfo.put(ATSConstants.EXIT_STATUS, getDataValueByKey(event, ATSConstants.EXIT_STATUS)); + jsonObject.put(ATSConstants.OTHER_INFO, otherInfo); + + return jsonObject; + } + + private static JSONObject convertDAGFinishedEvent(HistoryEventProto event) throws JSONException { + JSONObject jsonObject = new JSONObject(); + jsonObject.put(ATSConstants.ENTITY, event.getDagId()); + jsonObject.put(ATSConstants.ENTITY_TYPE, EntityTypes.TEZ_DAG_ID.name()); + + // Related Entities not needed as should have been done in + // dag submission event + + // TODO decide whether this goes into different events, + // event info or other info. + JSONArray events = new JSONArray(); + JSONObject finishEvent = new JSONObject(); + finishEvent.put(ATSConstants.TIMESTAMP, event.getEventTime()); + finishEvent.put(ATSConstants.EVENT_TYPE, HistoryEventType.DAG_FINISHED.name()); + events.put(finishEvent); + jsonObject.put(ATSConstants.EVENTS, events); + + JSONObject otherInfo = new JSONObject(); + + long startTime = getLongDataValueByKey(event, ATSConstants.START_TIME); + + otherInfo.put(ATSConstants.START_TIME, startTime); + otherInfo.put(ATSConstants.FINISH_TIME, event.getEventTime()); + otherInfo.put(ATSConstants.TIME_TAKEN, event.getEventTime() - startTime); + otherInfo.put(ATSConstants.STATUS, getDataValueByKey(event, ATSConstants.STATUS)); + otherInfo.put(ATSConstants.DIAGNOSTICS, getDataValueByKey(event, ATSConstants.DIAGNOSTICS)); + otherInfo.put(ATSConstants.COUNTERS, getJSONDataValueByKey(event, ATSConstants.COUNTERS)); + otherInfo.put(ATSConstants.COMPLETION_APPLICATION_ATTEMPT_ID, + event.getAppAttemptId().toString()); + + // added all info to otherInfo in order to cover + // all key/value pairs added from event.getDagTaskStats() + Iterator it = event.getEventDataList().iterator(); + while (it.hasNext()) { + KVPair pair = it.next(); + otherInfo.put(pair.getKey(), pair.getValue()); + } + + jsonObject.put(ATSConstants.OTHER_INFO, otherInfo); + + return jsonObject; + } + + private static JSONObject convertDAGInitializedEvent(HistoryEventProto event) + throws JSONException { + JSONObject jsonObject = new JSONObject(); + jsonObject.put(ATSConstants.ENTITY, event.getDagId()); + jsonObject.put(ATSConstants.ENTITY_TYPE, EntityTypes.TEZ_DAG_ID.name()); + + // Related Entities not needed as should have been done in + // dag submission event + + JSONArray events = new JSONArray(); + JSONObject initEvent = new JSONObject(); + initEvent.put(ATSConstants.TIMESTAMP, event.getEventTime()); + initEvent.put(ATSConstants.EVENT_TYPE, HistoryEventType.DAG_INITIALIZED.name()); + events.put(initEvent); + jsonObject.put(ATSConstants.EVENTS, events); + + JSONObject otherInfo = new JSONObject(); + otherInfo.put(ATSConstants.VERTEX_NAME_ID_MAPPING, + getJSONDataValueByKey(event, ATSConstants.VERTEX_NAME_ID_MAPPING)); + jsonObject.put(ATSConstants.OTHER_INFO, otherInfo); + + return jsonObject; + } + + private static JSONObject convertDAGStartedEvent(HistoryEventProto event) throws JSONException { + JSONObject jsonObject = new JSONObject(); + jsonObject.put(ATSConstants.ENTITY, event.getDagId()); + jsonObject.put(ATSConstants.ENTITY_TYPE, EntityTypes.TEZ_DAG_ID.name()); + + // Related Entities not needed as should have been done in + // dag submission event + + // TODO decide whether this goes into different events, + // event info or other info. + JSONArray events = new JSONArray(); + JSONObject startEvent = new JSONObject(); + startEvent.put(ATSConstants.TIMESTAMP, event.getEventTime()); + startEvent.put(ATSConstants.EVENT_TYPE, HistoryEventType.DAG_STARTED.name()); + events.put(startEvent); + jsonObject.put(ATSConstants.EVENTS, events); + + return jsonObject; + } + + private static JSONObject convertDAGSubmittedEvent(HistoryEventProto event) throws JSONException { + JSONObject jsonObject = new JSONObject(); + jsonObject.put(ATSConstants.ENTITY, event.getDagId()); + jsonObject.put(ATSConstants.ENTITY_TYPE, EntityTypes.TEZ_DAG_ID.name()); + + // Related Entities + JSONArray relatedEntities = new JSONArray(); + JSONObject tezAppEntity = new JSONObject(); + tezAppEntity.put(ATSConstants.ENTITY, "tez_" + event.getAppId().toString()); + tezAppEntity.put(ATSConstants.ENTITY_TYPE, EntityTypes.TEZ_APPLICATION.name()); + JSONObject tezAppAttemptEntity = new JSONObject(); + tezAppAttemptEntity.put(ATSConstants.ENTITY, "tez_" + event.getAppAttemptId().toString()); + tezAppAttemptEntity.put(ATSConstants.ENTITY_TYPE, EntityTypes.TEZ_APPLICATION_ATTEMPT.name()); + JSONObject appEntity = new JSONObject(); + appEntity.put(ATSConstants.ENTITY, event.getAppId().toString()); + appEntity.put(ATSConstants.ENTITY_TYPE, ATSConstants.APPLICATION_ID); + JSONObject appAttemptEntity = new JSONObject(); + appAttemptEntity.put(ATSConstants.ENTITY, event.getAppAttemptId().toString()); + appAttemptEntity.put(ATSConstants.ENTITY_TYPE, ATSConstants.APPLICATION_ATTEMPT_ID); + JSONObject userEntity = new JSONObject(); + userEntity.put(ATSConstants.ENTITY, event.getUser()); + userEntity.put(ATSConstants.ENTITY_TYPE, ATSConstants.USER); + + relatedEntities.put(tezAppEntity); + relatedEntities.put(tezAppAttemptEntity); + relatedEntities.put(appEntity); + relatedEntities.put(appAttemptEntity); + relatedEntities.put(userEntity); + jsonObject.put(ATSConstants.RELATED_ENTITIES, relatedEntities); + + // filters + JSONObject primaryFilters = new JSONObject(); + primaryFilters.put(ATSConstants.DAG_NAME, getDataValueByKey(event, ATSConstants.DAG_NAME)); + primaryFilters.put(ATSConstants.CALLER_CONTEXT_ID, + getDataValueByKey(event, ATSConstants.CALLER_CONTEXT_ID)); + primaryFilters.put(ATSConstants.CALLER_CONTEXT_TYPE, + getDataValueByKey(event, ATSConstants.CALLER_CONTEXT_TYPE)); + primaryFilters.put(ATSConstants.DAG_QUEUE_NAME, + getDataValueByKey(event, ATSConstants.DAG_QUEUE_NAME)); + + jsonObject.put(ATSConstants.PRIMARY_FILTERS, primaryFilters); + + // TODO decide whether this goes into different events, + // event info or other info. + JSONArray events = new JSONArray(); + JSONObject submitEvent = new JSONObject(); + submitEvent.put(ATSConstants.TIMESTAMP, event.getEventTime()); + submitEvent.put(ATSConstants.EVENT_TYPE, HistoryEventType.DAG_SUBMITTED.name()); + events.put(submitEvent); + jsonObject.put(ATSConstants.EVENTS, events); + + // Other info such as dag plan + JSONObject otherInfo = new JSONObject(); + otherInfo.put(ATSConstants.DAG_PLAN, getJSONDataValueByKey(event, ATSConstants.DAG_PLAN)); + + otherInfo.put(ATSConstants.CALLER_CONTEXT_ID, + getDataValueByKey(event, ATSConstants.CALLER_CONTEXT_ID)); + otherInfo.put(ATSConstants.CALLER_CONTEXT_TYPE, + getDataValueByKey(event, ATSConstants.CALLER_CONTEXT_TYPE)); + otherInfo.put(ATSConstants.DAG_QUEUE_NAME, + getDataValueByKey(event, ATSConstants.DAG_QUEUE_NAME)); + + jsonObject.put(ATSConstants.OTHER_INFO, otherInfo); + + return jsonObject; + } + + private static JSONObject convertTaskAttemptFinishedEvent(HistoryEventProto event) + throws JSONException { + JSONObject jsonObject = new JSONObject(); + jsonObject.put(ATSConstants.ENTITY, event.getTaskAttemptId()); + jsonObject.put(ATSConstants.ENTITY_TYPE, EntityTypes.TEZ_TASK_ATTEMPT_ID.name()); + + // Events + JSONArray events = new JSONArray(); + JSONObject finishEvent = new JSONObject(); + finishEvent.put(ATSConstants.TIMESTAMP, event.getEventTime()); + finishEvent.put(ATSConstants.EVENT_TYPE, HistoryEventType.TASK_ATTEMPT_FINISHED.name()); + events.put(finishEvent); + jsonObject.put(ATSConstants.EVENTS, events); + + JSONObject otherInfo = new JSONObject(); + long startTime = getLongDataValueByKey(event, ATSConstants.START_TIME); + + otherInfo.put(ATSConstants.CREATION_TIME, getDataValueByKey(event, ATSConstants.CREATION_TIME)); + otherInfo.put(ATSConstants.ALLOCATION_TIME, + getDataValueByKey(event, ATSConstants.ALLOCATION_TIME)); + otherInfo.put(ATSConstants.START_TIME, startTime); + otherInfo.put(ATSConstants.FINISH_TIME, event.getEventTime()); + otherInfo.put(ATSConstants.TIME_TAKEN, event.getEventTime() - startTime); + + otherInfo.put(ATSConstants.CREATION_CAUSAL_ATTEMPT, + getDataValueByKey(event, ATSConstants.CREATION_CAUSAL_ATTEMPT)); + otherInfo.put(ATSConstants.STATUS, getDataValueByKey(event, ATSConstants.STATUS)); + + otherInfo.put(ATSConstants.STATUS, getDataValueByKey(event, ATSConstants.STATUS)); + otherInfo.put(ATSConstants.TASK_ATTEMPT_ERROR_ENUM, + getDataValueByKey(event, ATSConstants.TASK_ATTEMPT_ERROR_ENUM)); + otherInfo.put(ATSConstants.TASK_FAILURE_TYPE, + getDataValueByKey(event, ATSConstants.TASK_FAILURE_TYPE)); + otherInfo.put(ATSConstants.DIAGNOSTICS, getDataValueByKey(event, ATSConstants.DIAGNOSTICS)); + otherInfo.put(ATSConstants.COUNTERS, getJSONDataValueByKey(event, ATSConstants.COUNTERS)); + otherInfo.put(ATSConstants.LAST_DATA_EVENTS, + getJSONDataValueByKey(event, ATSConstants.LAST_DATA_EVENTS)); + otherInfo.put(ATSConstants.NODE_ID, getDataValueByKey(event, ATSConstants.NODE_ID)); + otherInfo.put(ATSConstants.CONTAINER_ID, getDataValueByKey(event, ATSConstants.CONTAINER_ID)); + otherInfo.put(ATSConstants.IN_PROGRESS_LOGS_URL, + getDataValueByKey(event, ATSConstants.IN_PROGRESS_LOGS_URL)); + otherInfo.put(ATSConstants.COMPLETED_LOGS_URL, + getDataValueByKey(event, ATSConstants.COMPLETED_LOGS_URL)); + otherInfo.put(ATSConstants.NODE_HTTP_ADDRESS, + getDataValueByKey(event, ATSConstants.NODE_HTTP_ADDRESS)); + + jsonObject.put(ATSConstants.OTHER_INFO, otherInfo); + + return jsonObject; + } + + private static JSONObject convertTaskAttemptStartedEvent(HistoryEventProto event) + throws JSONException { + JSONObject jsonObject = new JSONObject(); + jsonObject.put(ATSConstants.ENTITY, event.getTaskAttemptId()); + jsonObject.put(ATSConstants.ENTITY_TYPE, EntityTypes.TEZ_TASK_ATTEMPT_ID.name()); + + // Related entities + JSONArray relatedEntities = new JSONArray(); + JSONObject nodeEntity = new JSONObject(); + nodeEntity.put(ATSConstants.ENTITY, getDataValueByKey(event, ATSConstants.NODE_ID)); + nodeEntity.put(ATSConstants.ENTITY_TYPE, ATSConstants.NODE_ID); + + JSONObject containerEntity = new JSONObject(); + containerEntity.put(ATSConstants.ENTITY, getDataValueByKey(event, ATSConstants.CONTAINER_ID)); + containerEntity.put(ATSConstants.ENTITY_TYPE, ATSConstants.CONTAINER_ID); + + JSONObject taskEntity = new JSONObject(); + taskEntity.put(ATSConstants.ENTITY, event.getTaskAttemptId()); + taskEntity.put(ATSConstants.ENTITY_TYPE, EntityTypes.TEZ_TASK_ID.name()); + + relatedEntities.put(nodeEntity); + relatedEntities.put(containerEntity); + relatedEntities.put(taskEntity); + jsonObject.put(ATSConstants.RELATED_ENTITIES, relatedEntities); + + // Events + JSONArray events = new JSONArray(); + JSONObject startEvent = new JSONObject(); + startEvent.put(ATSConstants.TIMESTAMP, event.getEventTime()); + startEvent.put(ATSConstants.EVENT_TYPE, HistoryEventType.TASK_ATTEMPT_STARTED.name()); + events.put(startEvent); + jsonObject.put(ATSConstants.EVENTS, events); + + // Other info + JSONObject otherInfo = new JSONObject(); + otherInfo.put(ATSConstants.IN_PROGRESS_LOGS_URL, + getDataValueByKey(event, ATSConstants.IN_PROGRESS_LOGS_URL)); + otherInfo.put(ATSConstants.COMPLETED_LOGS_URL, + getDataValueByKey(event, ATSConstants.COMPLETED_LOGS_URL)); + jsonObject.put(ATSConstants.OTHER_INFO, otherInfo); + + return jsonObject; + } + + private static JSONObject convertTaskFinishedEvent(HistoryEventProto event) throws JSONException { + JSONObject jsonObject = new JSONObject(); + jsonObject.put(ATSConstants.ENTITY, event.getTaskId()); + jsonObject.put(ATSConstants.ENTITY_TYPE, EntityTypes.TEZ_TASK_ID.name()); + + // Events + JSONArray events = new JSONArray(); + JSONObject finishEvent = new JSONObject(); + finishEvent.put(ATSConstants.TIMESTAMP, event.getEventTime()); + finishEvent.put(ATSConstants.EVENT_TYPE, HistoryEventType.TASK_FINISHED.name()); + events.put(finishEvent); + jsonObject.put(ATSConstants.EVENTS, events); + + long timeTaken = getLongDataValueByKey(event, ATSConstants.TIME_TAKEN); + + JSONObject otherInfo = new JSONObject(); + otherInfo.put(ATSConstants.START_TIME, event.getEventTime() - timeTaken); + otherInfo.put(ATSConstants.FINISH_TIME, event.getEventTime()); + otherInfo.put(ATSConstants.TIME_TAKEN, timeTaken); + + otherInfo.put(ATSConstants.STATUS, getDataValueByKey(event, ATSConstants.STATUS)); + otherInfo.put(ATSConstants.DIAGNOSTICS, getDataValueByKey(event, ATSConstants.DIAGNOSTICS)); + otherInfo.put(ATSConstants.COUNTERS, getJSONDataValueByKey(event, ATSConstants.COUNTERS)); + otherInfo.put(ATSConstants.SUCCESSFUL_ATTEMPT_ID, + getDataValueByKey(event, ATSConstants.SUCCESSFUL_ATTEMPT_ID)); + + jsonObject.put(ATSConstants.OTHER_INFO, otherInfo); + + return jsonObject; + } + + private static JSONObject convertTaskStartedEvent(HistoryEventProto event) throws JSONException { + JSONObject jsonObject = new JSONObject(); + jsonObject.put(ATSConstants.ENTITY, event.getTaskId()); + jsonObject.put(ATSConstants.ENTITY_TYPE, EntityTypes.TEZ_TASK_ID.name()); + + // Related entities + JSONArray relatedEntities = new JSONArray(); + JSONObject vertexEntity = new JSONObject(); + vertexEntity.put(ATSConstants.ENTITY, event.getVertexId()); + vertexEntity.put(ATSConstants.ENTITY_TYPE, EntityTypes.TEZ_VERTEX_ID.name()); + relatedEntities.put(vertexEntity); + jsonObject.put(ATSConstants.RELATED_ENTITIES, relatedEntities); + + // Events + JSONArray events = new JSONArray(); + JSONObject startEvent = new JSONObject(); + startEvent.put(ATSConstants.TIMESTAMP, event.getEventTime()); + startEvent.put(ATSConstants.EVENT_TYPE, HistoryEventType.TASK_STARTED.name()); + events.put(startEvent); + jsonObject.put(ATSConstants.EVENTS, events); + + // Other info + // TODO fix schedule/launch time to be events + JSONObject otherInfo = new JSONObject(); + otherInfo.put(ATSConstants.START_TIME, event.getEventTime()); + otherInfo.put(ATSConstants.SCHEDULED_TIME, + getDataValueByKey(event, ATSConstants.SCHEDULED_TIME)); + jsonObject.put(ATSConstants.OTHER_INFO, otherInfo); + + return jsonObject; + } + + private static JSONObject convertVertexFinishedEvent(HistoryEventProto event) + throws JSONException { + JSONObject jsonObject = new JSONObject(); + jsonObject.put(ATSConstants.ENTITY, event.getVertexId()); + jsonObject.put(ATSConstants.ENTITY_TYPE, EntityTypes.TEZ_VERTEX_ID.name()); + + // Events + JSONArray events = new JSONArray(); + JSONObject finishEvent = new JSONObject(); + finishEvent.put(ATSConstants.TIMESTAMP, event.getEventTime()); + finishEvent.put(ATSConstants.EVENT_TYPE, HistoryEventType.VERTEX_FINISHED.name()); + events.put(finishEvent); + jsonObject.put(ATSConstants.EVENTS, events); + + long timeTaken = getLongDataValueByKey(event, ATSConstants.TIME_TAKEN); + + JSONObject otherInfo = new JSONObject(); + otherInfo.put(ATSConstants.START_TIME, event.getEventTime() - timeTaken); + otherInfo.put(ATSConstants.FINISH_TIME, event.getEventTime()); + otherInfo.put(ATSConstants.TIME_TAKEN, timeTaken); + + otherInfo.put(ATSConstants.STATUS, getDataValueByKey(event, ATSConstants.STATUS)); + otherInfo.put(ATSConstants.DIAGNOSTICS, getDataValueByKey(event, ATSConstants.DIAGNOSTICS)); + otherInfo.put(ATSConstants.COUNTERS, getJSONDataValueByKey(event, ATSConstants.COUNTERS)); + + otherInfo.put(ATSConstants.STATS, getJSONDataValueByKey(event, ATSConstants.STATS)); + + // added all info to otherInfo in order to cover + // all key/value pairs added from event.getVertexTaskStats() + Iterator it = event.getEventDataList().iterator(); + while (it.hasNext()) { + KVPair pair = it.next(); + otherInfo.put(pair.getKey(), pair.getValue()); + } + + otherInfo.put(ATSConstants.SERVICE_PLUGIN, + getJSONDataValueByKey(event, ATSConstants.SERVICE_PLUGIN)); + + jsonObject.put(ATSConstants.OTHER_INFO, otherInfo); + + return jsonObject; + } + + private static JSONObject convertVertexReconfigureDoneEvent(HistoryEventProto event) + throws JSONException { + JSONObject jsonObject = new JSONObject(); + jsonObject.put(ATSConstants.ENTITY, event.getVertexId()); + jsonObject.put(ATSConstants.ENTITY_TYPE, EntityTypes.TEZ_VERTEX_ID.name()); + + // Events + JSONArray events = new JSONArray(); + JSONObject updateEvent = new JSONObject(); + updateEvent.put(ATSConstants.TIMESTAMP, event.getEventTime()); + updateEvent.put(ATSConstants.EVENT_TYPE, HistoryEventType.VERTEX_CONFIGURE_DONE.name()); + + JSONObject eventInfo = new JSONObject(); + eventInfo.put(ATSConstants.NUM_TASKS, getDataValueByKey(event, ATSConstants.NUM_TASKS)); + eventInfo.put(ATSConstants.UPDATED_EDGE_MANAGERS, + getJSONDataValueByKey(event, ATSConstants.UPDATED_EDGE_MANAGERS)); + updateEvent.put(ATSConstants.EVENT_INFO, eventInfo); + events.put(updateEvent); + jsonObject.put(ATSConstants.EVENTS, events); + + // Other info + JSONObject otherInfo = new JSONObject(); + jsonObject.put(ATSConstants.OTHER_INFO, otherInfo); + + // TODO add more on all other updated information + return jsonObject; + } + + private static JSONObject convertVertexInitializedEvent(HistoryEventProto event) + throws JSONException { + JSONObject jsonObject = new JSONObject(); + jsonObject.put(ATSConstants.ENTITY, event.getVertexId()); + jsonObject.put(ATSConstants.ENTITY_TYPE, EntityTypes.TEZ_VERTEX_ID.name()); + + // Related entities + JSONArray relatedEntities = new JSONArray(); + JSONObject vertexEntity = new JSONObject(); + vertexEntity.put(ATSConstants.ENTITY, event.getDagId()); + vertexEntity.put(ATSConstants.ENTITY_TYPE, EntityTypes.TEZ_DAG_ID.name()); + relatedEntities.put(vertexEntity); + jsonObject.put(ATSConstants.RELATED_ENTITIES, relatedEntities); + + // Events + JSONArray events = new JSONArray(); + JSONObject initEvent = new JSONObject(); + initEvent.put(ATSConstants.TIMESTAMP, event.getEventTime()); + initEvent.put(ATSConstants.EVENT_TYPE, HistoryEventType.VERTEX_INITIALIZED.name()); + events.put(initEvent); + jsonObject.put(ATSConstants.EVENTS, events); + + // Other info + // TODO fix requested times to be events + JSONObject otherInfo = new JSONObject(); + otherInfo.put(ATSConstants.VERTEX_NAME, getDataValueByKey(event, ATSConstants.VERTEX_NAME)); + otherInfo.put(ATSConstants.INIT_REQUESTED_TIME, + getDataValueByKey(event, ATSConstants.INIT_REQUESTED_TIME)); + otherInfo.put(ATSConstants.INIT_TIME, getDataValueByKey(event, ATSConstants.INIT_TIME)); + otherInfo.put(ATSConstants.NUM_TASKS, getDataValueByKey(event, ATSConstants.NUM_TASKS)); + otherInfo.put(ATSConstants.PROCESSOR_CLASS_NAME, + getDataValueByKey(event, ATSConstants.PROCESSOR_CLASS_NAME)); + otherInfo.put(ATSConstants.SERVICE_PLUGIN, + getJSONDataValueByKey(event, ATSConstants.SERVICE_PLUGIN)); + jsonObject.put(ATSConstants.OTHER_INFO, otherInfo); + + return jsonObject; + } + + private static JSONObject convertVertexStartedEvent(HistoryEventProto event) + throws JSONException { + JSONObject jsonObject = new JSONObject(); + jsonObject.put(ATSConstants.ENTITY, event.getVertexId()); + jsonObject.put(ATSConstants.ENTITY_TYPE, EntityTypes.TEZ_VERTEX_ID.name()); + + // Related entities + JSONArray relatedEntities = new JSONArray(); + JSONObject vertexEntity = new JSONObject(); + vertexEntity.put(ATSConstants.ENTITY, event.getDagId()); + vertexEntity.put(ATSConstants.ENTITY_TYPE, EntityTypes.TEZ_DAG_ID.name()); + relatedEntities.put(vertexEntity); + jsonObject.put(ATSConstants.RELATED_ENTITIES, relatedEntities); + + // Events + JSONArray events = new JSONArray(); + JSONObject startEvent = new JSONObject(); + startEvent.put(ATSConstants.TIMESTAMP, event.getEventTime()); + startEvent.put(ATSConstants.EVENT_TYPE, HistoryEventType.VERTEX_STARTED.name()); + events.put(startEvent); + jsonObject.put(ATSConstants.EVENTS, events); + + // Other info + // TODO fix requested times to be events + JSONObject otherInfo = new JSONObject(); + otherInfo.put(ATSConstants.START_REQUESTED_TIME, + getDataValueByKey(event, ATSConstants.START_REQUESTED_TIME)); + otherInfo.put(ATSConstants.START_TIME, event.getEventTime()); + jsonObject.put(ATSConstants.OTHER_INFO, otherInfo); + + return jsonObject; + } + + private static String getDataValueByKey(HistoryEventProto event, String key) { + Optional pair = + event.getEventDataList().stream().filter(p -> p.getKey().equals(key)).findAny(); + return pair.isPresent() ? pair.get().getValue() : null; + } + + private static long getLongDataValueByKey(HistoryEventProto event, String key) { + String value = getDataValueByKey(event, key); + return (value == null || value.isEmpty()) ? 0 : Long.parseLong(value); + } + + private static JSONObject getJSONDataValueByKey(HistoryEventProto event, String key) + throws JSONException { + String value = getDataValueByKey(event, key); + return (value == null || value.isEmpty()) ? null : new JSONObject(value); + } +} diff --git a/tez-plugins/tez-protobuf-history-plugin/src/main/java/org/apache/tez/dag/history/logging/proto/ProtoHistoryLoggingService.java b/tez-plugins/tez-protobuf-history-plugin/src/main/java/org/apache/tez/dag/history/logging/proto/ProtoHistoryLoggingService.java new file mode 100644 index 0000000000..d675a659a0 --- /dev/null +++ b/tez-plugins/tez-protobuf-history-plugin/src/main/java/org/apache/tez/dag/history/logging/proto/ProtoHistoryLoggingService.java @@ -0,0 +1,236 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

    + * http://www.apache.org/licenses/LICENSE-2.0 + *

    + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tez.dag.history.logging.proto; + +import java.io.IOException; +import java.time.LocalDate; +import java.util.concurrent.LinkedBlockingQueue; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicBoolean; + +import org.apache.commons.io.IOUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.tez.dag.api.TezConfiguration; +import org.apache.tez.dag.history.DAGHistoryEvent; +import org.apache.tez.dag.history.HistoryEvent; +import org.apache.tez.dag.history.HistoryEventType; +import org.apache.tez.dag.history.events.DAGFinishedEvent; +import org.apache.tez.dag.history.logging.HistoryLoggingService; +import org.apache.tez.dag.history.logging.proto.HistoryLoggerProtos.HistoryEventProto; +import org.apache.tez.dag.history.logging.proto.HistoryLoggerProtos.ManifestEntryProto; +import org.apache.tez.dag.records.TezDAGID; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Logging service to write history events serialized using protobuf into sequence files. + * This can be used as external tables in hive. Or the reader can be used independently to + * read the data from these files. + */ +public class ProtoHistoryLoggingService extends HistoryLoggingService { + private static final Logger LOG = LoggerFactory.getLogger(ProtoHistoryLoggingService.class); + // The file suffix used if we are writing start events and rest into different files. + static final String SPLIT_DAG_EVENTS_FILE_SUFFIX = "_1"; + + private final HistoryEventProtoConverter converter = + new HistoryEventProtoConverter(); + private boolean loggingDisabled = false; + + private LinkedBlockingQueue eventQueue; + private Thread eventHandlingThread; + private final AtomicBoolean stopped = new AtomicBoolean(false); + + private TezProtoLoggers loggers; + private ProtoMessageWriter appEventsWriter; + private ProtoMessageWriter dagEventsWriter; + private ProtoMessageWriter manifestEventsWriter; + private LocalDate manifestDate; + private TezDAGID currentDagId; + private long dagSubmittedEventOffset = -1; + + private String appEventsFile; + private long appLaunchedEventOffset; + private boolean splitDagStartEvents; + + public ProtoHistoryLoggingService() { + super(ProtoHistoryLoggingService.class.getName()); + } + + @Override + protected void serviceInit(Configuration conf) { + LOG.info("Initing ProtoHistoryLoggingService"); + setConfig(conf); + loggingDisabled = !conf.getBoolean(TezConfiguration.TEZ_AM_HISTORY_LOGGING_ENABLED, + TezConfiguration.TEZ_AM_HISTORY_LOGGING_ENABLED_DEFAULT); + splitDagStartEvents = conf.getBoolean(TezConfiguration.TEZ_HISTORY_LOGGING_PROTO_SPLIT_DAG_START, + TezConfiguration.TEZ_HISTORY_LOGGING_PROTO_SPLIT_DAG_START_DEFAULT); + final int queueSize = conf.getInt(TezConfiguration.TEZ_HISTORY_LOGGING_PROTO_QUEUE_SIZE, + TezConfiguration.TEZ_HISTORY_LOGGING_PROTO_QUEUE_SIZE_DEFAULT); + eventQueue = new LinkedBlockingQueue<>(queueSize); + LOG.info("Inited ProtoHistoryLoggingService. loggingDisabled: {} splitDagStartEvents: {} queueSize: {}", + loggingDisabled, splitDagStartEvents, queueSize); + } + + @Override + protected void serviceStart() throws Exception { + LOG.info("Starting ProtoHistoryLoggingService"); + if (!loggingDisabled) { + loggers = new TezProtoLoggers(); + if (!loggers.setup(getConfig(), appContext.getClock())) { + LOG.warn("Log file location for ProtoHistoryLoggingService not specified, " + + "logging disabled"); + loggingDisabled = true; + return; + } + appEventsWriter = loggers.getAppEventsLogger().getWriter( + appContext.getApplicationAttemptId().toString()); + eventHandlingThread = new Thread(this::loop, "HistoryEventHandlingThread"); + eventHandlingThread.start(); + } + LOG.info("Started ProtoHistoryLoggingService"); + } + + @Override + protected void serviceStop() throws Exception { + LOG.info("Stopping ProtoHistoryLoggingService, eventQueueBacklog=" + eventQueue.size()); + stopped.set(true); + eventHandlingThread.join(); + IOUtils.closeQuietly(appEventsWriter); + IOUtils.closeQuietly(dagEventsWriter); + IOUtils.closeQuietly(manifestEventsWriter); + LOG.info("Stopped ProtoHistoryLoggingService"); + } + + @Override + public void handle(DAGHistoryEvent event) { + if (loggingDisabled || stopped.get()) { + return; + } + try { + eventQueue.add(event); + } catch (IllegalStateException e) { + LOG.error("Queue capacity filled up, ignoring event: " + + event.getHistoryEvent().getEventType()); + if (LOG.isDebugEnabled()) { + LOG.debug("Queue capacity filled up, ignoring event: {}", event.getHistoryEvent()); + } + } + } + + private void loop() { + // Keep looping while the service is not stopped. + // Drain any left over events after the service has been stopped. + while (!stopped.get() || !eventQueue.isEmpty()) { + DAGHistoryEvent evt = null; + try { + evt = eventQueue.poll(100, TimeUnit.MILLISECONDS); + if (evt != null) { + handleEvent(evt); + } + } catch (InterruptedException e) { + LOG.info("EventQueue poll interrupted, ignoring it.", e); + } catch (IOException e) { + TezDAGID dagid = evt.getDAGID(); + HistoryEventType type = evt.getHistoryEvent().getEventType(); + // Retry is hard, because there are several places where this exception can happen + // the state will get messed up a lot. + LOG.error("Got exception while handling event {} for dag {}.", type, dagid, e); + } + } + } + + private void handleEvent(DAGHistoryEvent event) throws IOException { + if (loggingDisabled) { + return; + } + HistoryEvent historyEvent = event.getHistoryEvent(); + if (event.getDAGID() == null) { + if (historyEvent.getEventType() == HistoryEventType.APP_LAUNCHED) { + appEventsFile = appEventsWriter.getPath().toString(); + appLaunchedEventOffset = appEventsWriter.getOffset(); + } + appEventsWriter.writeProto(converter.convert(historyEvent)); + } else { + HistoryEventType type = historyEvent.getEventType(); + TezDAGID dagId = event.getDAGID(); + if (type == HistoryEventType.DAG_FINISHED) { + finishCurrentDag((DAGFinishedEvent)historyEvent); + } else if (type == HistoryEventType.DAG_SUBMITTED) { + finishCurrentDag(null); + currentDagId = dagId; + dagEventsWriter = loggers.getDagEventsLogger().getWriter(dagId.toString() + + "_" + appContext.getApplicationAttemptId().getAttemptId()); + dagSubmittedEventOffset = dagEventsWriter.getOffset(); + dagEventsWriter.writeProto(converter.convert(historyEvent)); + } else if (dagEventsWriter != null) { + dagEventsWriter.writeProto(converter.convert(historyEvent)); + if (splitDagStartEvents && type == HistoryEventType.DAG_STARTED) { + // Close the file and write submitted event offset into manifest. + finishCurrentDag(null); + dagEventsWriter = loggers.getDagEventsLogger().getWriter(dagId.toString() + + "_" + appContext.getApplicationAttemptId().getAttemptId() + + SPLIT_DAG_EVENTS_FILE_SUFFIX); + } + } + } + } + + private void finishCurrentDag(DAGFinishedEvent event) throws IOException { + if (dagEventsWriter == null) { + return; + } + try { + long finishEventOffset = -1; + if (event != null) { + finishEventOffset = dagEventsWriter.getOffset(); + dagEventsWriter.writeProto(converter.convert(event)); + } + DatePartitionedLogger manifestLogger = loggers.getManifestEventsLogger(); + if (manifestDate == null || !manifestDate.equals(manifestLogger.getNow().toLocalDate())) { + // The day has changed write to a new file. + IOUtils.closeQuietly(manifestEventsWriter); + manifestEventsWriter = manifestLogger.getWriter( + appContext.getApplicationAttemptId().toString()); + manifestDate = manifestLogger.getDateFromDir( + manifestEventsWriter.getPath().getParent().getName()); + } + ManifestEntryProto.Builder entry = ManifestEntryProto.newBuilder() + .setDagId(currentDagId.toString()) + .setAppId(currentDagId.getApplicationId().toString()) + .setDagSubmittedEventOffset(dagSubmittedEventOffset) + .setDagFinishedEventOffset(finishEventOffset) + .setDagFilePath(dagEventsWriter.getPath().toString()) + .setAppFilePath(appEventsFile) + .setAppLaunchedEventOffset(appLaunchedEventOffset) + .setWriteTime(System.currentTimeMillis()); + if (event != null) { + entry.setDagId(event.getDAGID().toString()); + } + manifestEventsWriter.writeProto(entry.build()); + manifestEventsWriter.hflush(); + appEventsWriter.hflush(); + } finally { + // On an error, cleanup everything this will ensure, we do not use one dag's writer + // into another dag. + IOUtils.closeQuietly(dagEventsWriter); + dagEventsWriter = null; + dagSubmittedEventOffset = -1; + } + } +} diff --git a/tez-plugins/tez-protobuf-history-plugin/src/main/java/org/apache/tez/dag/history/logging/proto/ProtoMessageReader.java b/tez-plugins/tez-protobuf-history-plugin/src/main/java/org/apache/tez/dag/history/logging/proto/ProtoMessageReader.java new file mode 100644 index 0000000000..2cac4d8764 --- /dev/null +++ b/tez-plugins/tez-protobuf-history-plugin/src/main/java/org/apache/tez/dag/history/logging/proto/ProtoMessageReader.java @@ -0,0 +1,69 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

    + * http://www.apache.org/licenses/LICENSE-2.0 + *

    + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tez.dag.history.logging.proto; + +import java.io.Closeable; +import java.io.IOException; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.io.NullWritable; +import org.apache.hadoop.io.SequenceFile.Reader; + +import com.google.protobuf.MessageLite; +import com.google.protobuf.Parser; + +public class ProtoMessageReader implements Closeable { + private final Path filePath; + private final Reader reader; + private final ProtoMessageWritable writable; + + public ProtoMessageReader(Configuration conf, Path filePath, Parser parser) throws IOException { + this.filePath = filePath; + // The writer does not flush the length during hflush. Using length options lets us read + // past length in the FileStatus but it will throw EOFException during a read instead + // of returning null. + this.reader = new Reader(conf, Reader.file(filePath), Reader.length(Long.MAX_VALUE)); + this.writable = new ProtoMessageWritable<>(parser); + } + + public Path getFilePath() { + return filePath; + } + + public void setOffset(long offset) throws IOException { + reader.seek(offset); + } + + public long getOffset() throws IOException { + return reader.getPosition(); + } + + public T readEvent() throws IOException { + if (!reader.next(NullWritable.get(), writable)) { + return null; + } + return writable.getMessage(); + } + + @Override + public void close() throws IOException { + reader.close(); + } +} diff --git a/tez-plugins/tez-protobuf-history-plugin/src/main/java/org/apache/tez/dag/history/logging/proto/ProtoMessageWritable.java b/tez-plugins/tez-protobuf-history-plugin/src/main/java/org/apache/tez/dag/history/logging/proto/ProtoMessageWritable.java new file mode 100644 index 0000000000..63a1ebda08 --- /dev/null +++ b/tez-plugins/tez-protobuf-history-plugin/src/main/java/org/apache/tez/dag/history/logging/proto/ProtoMessageWritable.java @@ -0,0 +1,103 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

    + * http://www.apache.org/licenses/LICENSE-2.0 + *

    + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tez.dag.history.logging.proto; + +import java.io.DataInput; +import java.io.DataOutput; +import java.io.EOFException; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; + +import com.google.protobuf.ExtensionRegistry; +import org.apache.hadoop.io.Writable; + +import com.google.protobuf.CodedInputStream; +import com.google.protobuf.CodedOutputStream; +import com.google.protobuf.MessageLite; +import com.google.protobuf.Parser; + +public class ProtoMessageWritable implements Writable { + private T message; + private final Parser parser; + private DataOutputStream dos; + private CodedOutputStream cos; + private DataInputStream din; + private CodedInputStream cin; + + ProtoMessageWritable(Parser parser) { + this.parser = parser; + } + + public T getMessage() { + return message; + } + + public void setMessage(T message) { + this.message = message; + } + + private static class DataOutputStream extends OutputStream { + DataOutput out; + @Override + public void write(int b) throws IOException { + out.write(b); + } + + @Override + public void write(byte b[], int off, int len) throws IOException { + out.write(b, off, len); + } + } + + @Override + public void write(DataOutput out) throws IOException { + if (dos == null) { + dos = new DataOutputStream(); + cos = CodedOutputStream.newInstance(dos); + } + dos.out = out; + cos.writeMessageNoTag(message); + cos.flush(); + } + + private static class DataInputStream extends InputStream { + DataInput in; + @Override + public int read() throws IOException { + try { + return in.readUnsignedByte(); + } catch (EOFException e) { + return -1; + } + } + } + + @Override + public void readFields(DataInput in) throws IOException { + if (din == null) { + din = new DataInputStream(); + cin = CodedInputStream.newInstance(din); + cin.setSizeLimit(Integer.MAX_VALUE); + } + din.in = in; + message = cin.readMessage(parser, ExtensionRegistry.getEmptyRegistry()); + cin.resetSizeCounter(); + } +} diff --git a/tez-plugins/tez-protobuf-history-plugin/src/main/java/org/apache/tez/dag/history/logging/proto/ProtoMessageWriter.java b/tez-plugins/tez-protobuf-history-plugin/src/main/java/org/apache/tez/dag/history/logging/proto/ProtoMessageWriter.java new file mode 100644 index 0000000000..5b7591b496 --- /dev/null +++ b/tez-plugins/tez-protobuf-history-plugin/src/main/java/org/apache/tez/dag/history/logging/proto/ProtoMessageWriter.java @@ -0,0 +1,72 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

    + * http://www.apache.org/licenses/LICENSE-2.0 + *

    + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tez.dag.history.logging.proto; + +import java.io.Closeable; +import java.io.IOException; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.io.NullWritable; +import org.apache.hadoop.io.SequenceFile; +import org.apache.hadoop.io.SequenceFile.CompressionType; +import org.apache.hadoop.io.SequenceFile.Writer; + +import com.google.protobuf.MessageLite; +import com.google.protobuf.Parser; +import org.apache.tez.common.StreamHelper; + +public class ProtoMessageWriter implements Closeable { + private final Path filePath; + private final Writer writer; + private final ProtoMessageWritable writable; + + ProtoMessageWriter(Configuration conf, Path filePath, Parser parser) throws IOException { + this.filePath = filePath; + this.writer = SequenceFile.createWriter( + conf, + Writer.file(filePath), + Writer.keyClass(NullWritable.class), + Writer.valueClass(ProtoMessageWritable.class), + Writer.compression(CompressionType.RECORD)); + this.writable = new ProtoMessageWritable<>(parser); + } + + public Path getPath() { + return filePath; + } + + public long getOffset() throws IOException { + return writer.getLength(); + } + + public void writeProto(T message) throws IOException { + writable.setMessage(message); + writer.append(NullWritable.get(), writable); + } + + public void hflush() throws IOException { + StreamHelper.hflushIfSupported(writer); + } + + @Override + public void close() throws IOException { + writer.close(); + } +} diff --git a/tez-plugins/tez-protobuf-history-plugin/src/main/java/org/apache/tez/dag/history/logging/proto/TezProtoLoggers.java b/tez-plugins/tez-protobuf-history-plugin/src/main/java/org/apache/tez/dag/history/logging/proto/TezProtoLoggers.java new file mode 100644 index 0000000000..44390fc063 --- /dev/null +++ b/tez-plugins/tez-protobuf-history-plugin/src/main/java/org/apache/tez/dag/history/logging/proto/TezProtoLoggers.java @@ -0,0 +1,64 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

    + * http://www.apache.org/licenses/LICENSE-2.0 + *

    + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tez.dag.history.logging.proto; + +import java.io.IOException; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.yarn.util.Clock; +import org.apache.tez.dag.api.TezConfiguration; +import org.apache.tez.dag.history.logging.proto.HistoryLoggerProtos.HistoryEventProto; +import org.apache.tez.dag.history.logging.proto.HistoryLoggerProtos.ManifestEntryProto; + +/** + * Helper class to create the logger for tez, we would use this to read the events outside + * tez and hence isolating the configuration and the paths in this. + */ +public class TezProtoLoggers { + private DatePartitionedLogger appEventsLogger; + private DatePartitionedLogger dagEventsLogger; + private DatePartitionedLogger manifestEventsLogger; + + public boolean setup(Configuration conf, Clock clock) throws IOException { + String logDir = conf.get(TezConfiguration.TEZ_HISTORY_LOGGING_PROTO_BASE_DIR); + if (logDir == null) { + return false; + } + appEventsLogger = new DatePartitionedLogger<>(HistoryEventProto.PARSER, + new Path(logDir, "app_data"), conf, clock); + dagEventsLogger = new DatePartitionedLogger<>(HistoryEventProto.PARSER, + new Path(logDir, "dag_data"), conf, clock); + manifestEventsLogger = new DatePartitionedLogger<>(ManifestEntryProto.PARSER, + new Path(logDir, "dag_meta"), conf, clock); + return true; + } + + public DatePartitionedLogger getAppEventsLogger() { + return appEventsLogger; + } + + public DatePartitionedLogger getDagEventsLogger() { + return dagEventsLogger; + } + + public DatePartitionedLogger getManifestEventsLogger() { + return manifestEventsLogger; + } +} diff --git a/tez-plugins/tez-protobuf-history-plugin/src/main/proto/HistoryLogger.proto b/tez-plugins/tez-protobuf-history-plugin/src/main/proto/HistoryLogger.proto new file mode 100644 index 0000000000..a5bbe3492e --- /dev/null +++ b/tez-plugins/tez-protobuf-history-plugin/src/main/proto/HistoryLogger.proto @@ -0,0 +1,49 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +option java_package = "org.apache.tez.dag.history.logging.proto"; +option java_outer_classname = "HistoryLoggerProtos"; + +message KVPair { + optional string key = 1; + optional string value = 2; +} + +message HistoryEventProto { + optional string event_type = 1; + optional int64 event_time = 2; + optional string user = 3; + optional string app_id = 4; + optional string app_attempt_id = 5; + optional string dag_id = 6; + optional string vertex_id = 7; + optional string task_id = 8; + optional string task_attempt_id = 9; + repeated KVPair event_data = 10; +} + +message ManifestEntryProto { + optional string dag_id = 1; + optional string app_id = 2; + optional int64 dag_submitted_event_offset = 3; + optional int64 dag_finished_event_offset = 4; + optional string dag_file_path = 5; + optional int64 writeTime = 6; + optional string app_file_path = 7; + optional int64 app_launched_event_offset = 8; +} diff --git a/tez-plugins/tez-protobuf-history-plugin/src/test/java/org/apache/tez/dag/history/logging/proto/TestDagManifestFileScanner.java b/tez-plugins/tez-protobuf-history-plugin/src/test/java/org/apache/tez/dag/history/logging/proto/TestDagManifestFileScanner.java new file mode 100644 index 0000000000..4950522429 --- /dev/null +++ b/tez-plugins/tez-protobuf-history-plugin/src/test/java/org/apache/tez/dag/history/logging/proto/TestDagManifestFileScanner.java @@ -0,0 +1,181 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

    + * http://www.apache.org/licenses/LICENSE-2.0 + *

    + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tez.dag.history.logging.proto; + +import java.io.IOException; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.yarn.api.records.ApplicationId; +import org.apache.hadoop.yarn.util.Clock; +import org.apache.tez.dag.api.TezConfiguration; +import org.apache.tez.dag.history.logging.proto.HistoryLoggerProtos.ManifestEntryProto; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TemporaryFolder; + +public class TestDagManifestFileScanner { + private MockClock clock; + private DatePartitionedLogger manifestLogger; + + @Rule + public TemporaryFolder tempFolder = new TemporaryFolder(); + + @Before + public void setupTest() throws Exception { + String basePath = tempFolder.newFolder().getAbsolutePath(); + clock = new MockClock(); + Configuration conf = new Configuration(false); + conf.set(TezConfiguration.TEZ_HISTORY_LOGGING_PROTO_BASE_DIR, basePath); + // LocalFileSystem does not implement truncate. + conf.set("fs.file.impl", "org.apache.hadoop.fs.RawLocalFileSystem"); + TezProtoLoggers loggers = new TezProtoLoggers(); + loggers.setup(conf, clock); + manifestLogger = loggers.getManifestEventsLogger(); + } + + @Test(timeout=5000) + public void testNormal() throws Exception { + clock.setTime(0); // 0th day. + createManifestEvents(0, 8); + clock.setTime((24 * 60 * 60 + 1) * 1000); // 1 day 1 sec. + createManifestEvents(24 * 3600, 5); + DagManifesFileScanner scanner = new DagManifesFileScanner(manifestLogger); + int count = 0; + while (scanner.getNext() != null) { + ++count; + } + Assert.assertEquals(8, count); + + // Save offset for later use. + String offset = scanner.getOffset(); + + // Move time outside the window, no changes and it will give more events. + clock.setTime((24 * 60 * 60 + 61) * 1000); // 1 day 61 sec. + count = 0; + while (scanner.getNext() != null) { + ++count; + } + Assert.assertEquals(5, count); + + // Reset the offset + scanner.setOffset(offset); + count = 0; + while (scanner.getNext() != null) { + ++count; + } + Assert.assertEquals(5, count); + + scanner.close(); + + // Not able to test append since the LocalFileSystem does not implement append. + } + + private Path deleteFilePath = null; + @Test(timeout=5000) + public void testError() throws Exception { + clock.setTime(0); // 0th day. + createManifestEvents(0, 4); + corruptFiles(); + clock.setTime((24 * 60 * 60 + 1) * 1000); // 1 day 1 sec. + createManifestEvents(24 * 3600, 1); + + DagManifesFileScanner scanner = new DagManifesFileScanner(manifestLogger); + Assert.assertNotNull(scanner.getNext()); + deleteFilePath.getFileSystem(manifestLogger.getConfig()).delete(deleteFilePath, false); + // 4 files - 1 file deleted - 1 truncated - 1 corrupted => 1 remains. + Assert.assertNull(scanner.getNext()); + + // Save offset for later use. + String offset = scanner.getOffset(); + + // Move time outside the window, it should skip files with error and give more data for + // next day. + clock.setTime((24 * 60 * 60 + 61) * 1000); // 1 day 61 sec. + Assert.assertNotNull(scanner.getNext()); + Assert.assertNull(scanner.getNext()); + + // Reset the offset + scanner.setOffset(offset); + Assert.assertNotNull(scanner.getNext()); + Assert.assertNull(scanner.getNext()); + scanner.close(); + } + + private void createManifestEvents(long time, int numEvents) throws IOException { + for (int i = 0; i < numEvents; ++i) { + ApplicationId appId = ApplicationId.newInstance(1000l, i); + ManifestEntryProto proto = ManifestEntryProto.newBuilder() + .setAppId(appId.toString()) + .setDagFilePath("dummy_dag_path_" + i) + .setDagSubmittedEventOffset(0) + .setDagFinishedEventOffset(1) + .setAppFilePath("dummp_app_path_" + i) + .setAppLaunchedEventOffset(2) + .setWriteTime(clock.getTime()) + .build(); + ProtoMessageWriter writer = manifestLogger.getWriter(appId.toString()); + writer.writeProto(proto); + writer.close(); + } + } + + private void corruptFiles() throws IOException { + int op = 0; + Configuration conf = manifestLogger.getConfig(); + Path base = new Path( + conf.get(TezConfiguration.TEZ_HISTORY_LOGGING_PROTO_BASE_DIR) + "/dag_meta"); + FileSystem fs = base.getFileSystem(conf); + for (FileStatus status : fs.listStatus(base)) { + if (status.isDirectory()) { + for (FileStatus file : fs.listStatus(status.getPath())) { + if (!file.getPath().getName().startsWith("application_")) { + continue; + } + switch (op) { + case 0: + case 1: + fs.truncate(file.getPath(), op == 1 ? 0 : file.getLen() - 20); + break; + case 3: + deleteFilePath = file.getPath(); + break; + } + op++; + } + } + } + } + + private static class MockClock implements Clock { + private long time = 0; + + void setTime(long time) { + this.time = time; + } + + @Override + public long getTime() { + return time; + } + } +} diff --git a/tez-plugins/tez-protobuf-history-plugin/src/test/java/org/apache/tez/dag/history/logging/proto/TestHistoryEventProtoConverter.java b/tez-plugins/tez-protobuf-history-plugin/src/test/java/org/apache/tez/dag/history/logging/proto/TestHistoryEventProtoConverter.java new file mode 100644 index 0000000000..64f66bc12b --- /dev/null +++ b/tez-plugins/tez-protobuf-history-plugin/src/test/java/org/apache/tez/dag/history/logging/proto/TestHistoryEventProtoConverter.java @@ -0,0 +1,716 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tez.dag.history.logging.proto; + +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Random; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; +import org.apache.hadoop.yarn.api.records.ApplicationId; +import org.apache.hadoop.yarn.api.records.ContainerId; +import org.apache.hadoop.yarn.api.records.NodeId; +import org.apache.tez.common.ATSConstants; +import org.apache.tez.common.VersionInfo; +import org.apache.tez.common.counters.TezCounters; +import org.apache.tez.dag.api.EdgeManagerPluginDescriptor; +import org.apache.tez.dag.api.EdgeProperty; +import org.apache.tez.dag.api.EdgeProperty.DataSourceType; +import org.apache.tez.dag.api.EdgeProperty.SchedulingType; +import org.apache.tez.dag.api.InputDescriptor; +import org.apache.tez.dag.api.OutputDescriptor; +import org.apache.tez.dag.api.oldrecords.TaskAttemptState; +import org.apache.tez.dag.api.oldrecords.TaskState; +import org.apache.tez.dag.api.records.DAGProtos.CallerContextProto; +import org.apache.tez.dag.api.records.DAGProtos.DAGPlan; +import org.apache.tez.dag.app.dag.DAGState; +import org.apache.tez.dag.app.dag.VertexState; +import org.apache.tez.dag.app.dag.impl.ServicePluginInfo; +import org.apache.tez.dag.app.dag.impl.TaskAttemptImpl.DataEventDependencyInfo; +import org.apache.tez.dag.app.dag.impl.VertexStats; +import org.apache.tez.dag.app.web.AMWebController; +import org.apache.tez.dag.history.HistoryEvent; +import org.apache.tez.dag.history.HistoryEventType; +import org.apache.tez.dag.history.events.AMLaunchedEvent; +import org.apache.tez.dag.history.events.AMStartedEvent; +import org.apache.tez.dag.history.events.AppLaunchedEvent; +import org.apache.tez.dag.history.events.ContainerLaunchedEvent; +import org.apache.tez.dag.history.events.ContainerStoppedEvent; +import org.apache.tez.dag.history.events.DAGCommitStartedEvent; +import org.apache.tez.dag.history.events.DAGFinishedEvent; +import org.apache.tez.dag.history.events.DAGInitializedEvent; +import org.apache.tez.dag.history.events.DAGKillRequestEvent; +import org.apache.tez.dag.history.events.DAGRecoveredEvent; +import org.apache.tez.dag.history.events.DAGStartedEvent; +import org.apache.tez.dag.history.events.DAGSubmittedEvent; +import org.apache.tez.dag.history.events.TaskAttemptFinishedEvent; +import org.apache.tez.dag.history.events.TaskAttemptStartedEvent; +import org.apache.tez.dag.history.events.TaskFinishedEvent; +import org.apache.tez.dag.history.events.TaskStartedEvent; +import org.apache.tez.dag.history.events.VertexCommitStartedEvent; +import org.apache.tez.dag.history.events.VertexConfigurationDoneEvent; +import org.apache.tez.dag.history.events.VertexFinishedEvent; +import org.apache.tez.dag.history.events.VertexGroupCommitFinishedEvent; +import org.apache.tez.dag.history.events.VertexGroupCommitStartedEvent; +import org.apache.tez.dag.history.events.VertexInitializedEvent; +import org.apache.tez.dag.history.events.VertexStartedEvent; +import org.apache.tez.dag.history.logging.EntityTypes; +import org.apache.tez.dag.history.logging.proto.HistoryLoggerProtos.HistoryEventProto; +import org.apache.tez.dag.history.logging.proto.HistoryLoggerProtos.KVPair; +import org.apache.tez.dag.records.TaskAttemptTerminationCause; +import org.apache.tez.dag.records.TezDAGID; +import org.apache.tez.dag.records.TezTaskAttemptID; +import org.apache.tez.dag.records.TezTaskID; +import org.apache.tez.dag.records.TezVertexID; +import org.apache.tez.runtime.api.TaskFailureType; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +import com.google.common.collect.Lists; + +public class TestHistoryEventProtoConverter { + private ApplicationAttemptId applicationAttemptId; + private ApplicationId applicationId; + private String user = "user"; + private Random random = new Random(); + private TezDAGID tezDAGID; + private TezVertexID tezVertexID; + private TezTaskID tezTaskID; + private TezTaskAttemptID tezTaskAttemptID; + private DAGPlan dagPlan; + private ContainerId containerId; + private NodeId nodeId; + private String containerLogs = "containerLogs"; + private HistoryEventProtoConverter converter = new HistoryEventProtoConverter(); + + @Before + public void setup() { + applicationId = ApplicationId.newInstance(9999l, 1); + applicationAttemptId = ApplicationAttemptId.newInstance(applicationId, 1); + tezDAGID = TezDAGID.getInstance(applicationId, random.nextInt()); + tezVertexID = TezVertexID.getInstance(tezDAGID, random.nextInt()); + tezTaskID = TezTaskID.getInstance(tezVertexID, random.nextInt()); + tezTaskAttemptID = TezTaskAttemptID.getInstance(tezTaskID, random.nextInt()); + CallerContextProto.Builder callerContextProto = CallerContextProto.newBuilder(); + callerContextProto.setContext("ctxt"); + callerContextProto.setCallerId("Caller_ID"); + callerContextProto.setCallerType("Caller_Type"); + callerContextProto.setBlob("Desc_1"); + dagPlan = DAGPlan.newBuilder().setName("DAGPlanMock") + .setCallerContext(callerContextProto).build(); + containerId = ContainerId.newContainerId(applicationAttemptId, 111); + nodeId = NodeId.newInstance("node", 13435); + } + + @Test(timeout = 5000) + public void testHandlerExists() { + for (HistoryEventType eventType : HistoryEventType.values()) { + HistoryEvent event = null; + switch (eventType) { + case APP_LAUNCHED: + event = new AppLaunchedEvent(applicationId, random.nextInt(), random.nextInt(), + user, new Configuration(false), null); + break; + case AM_LAUNCHED: + event = new AMLaunchedEvent(applicationAttemptId, random.nextInt(), random.nextInt(), + user); + break; + case AM_STARTED: + event = new AMStartedEvent(applicationAttemptId, random.nextInt(), user); + break; + case DAG_SUBMITTED: + event = new DAGSubmittedEvent(tezDAGID, random.nextInt(), dagPlan, applicationAttemptId, + null, user, null, containerLogs, null); + break; + case DAG_INITIALIZED: + event = new DAGInitializedEvent(tezDAGID, random.nextInt(), user, dagPlan.getName(), null); + break; + case DAG_STARTED: + event = new DAGStartedEvent(tezDAGID, random.nextInt(), user, dagPlan.getName()); + break; + case DAG_FINISHED: + event = new DAGFinishedEvent(tezDAGID, random.nextInt(), random.nextInt(), DAGState.ERROR, + null, null, user, dagPlan.getName(), null, applicationAttemptId, dagPlan); + break; + case VERTEX_INITIALIZED: + event = new VertexInitializedEvent(tezVertexID, "v1", random.nextInt(), random.nextInt(), + random.nextInt(), "proc", null, null, null); + break; + case VERTEX_STARTED: + event = new VertexStartedEvent(tezVertexID, random.nextInt(), random.nextInt()); + break; + case VERTEX_CONFIGURE_DONE: + event = new VertexConfigurationDoneEvent(tezVertexID, 0L, 1, null, null, null, true); + break; + case VERTEX_FINISHED: + event = new VertexFinishedEvent(tezVertexID, "v1", 1, random.nextInt(), random.nextInt(), + random.nextInt(), random.nextInt(), random.nextInt(), VertexState.ERROR, + null, null, null, null, null); + break; + case TASK_STARTED: + event = new TaskStartedEvent(tezTaskID, "v1", random.nextInt(), random.nextInt()); + break; + case TASK_FINISHED: + event = new TaskFinishedEvent(tezTaskID, "v1", random.nextInt(), random.nextInt(), + tezTaskAttemptID, TaskState.FAILED, null, null, 0); + break; + case TASK_ATTEMPT_STARTED: + event = new TaskAttemptStartedEvent(tezTaskAttemptID, "v1", random.nextInt(), containerId, + nodeId, null, null, "nodeHttpAddress"); + break; + case TASK_ATTEMPT_FINISHED: + event = new TaskAttemptFinishedEvent(tezTaskAttemptID, "v1", random.nextInt(), + random.nextInt(), TaskAttemptState.FAILED, TaskFailureType.NON_FATAL, TaskAttemptTerminationCause.OUTPUT_LOST, + null, null, null, null, 0, null, 0, + containerId, nodeId, null, null, "nodeHttpAddress"); + break; + case CONTAINER_LAUNCHED: + event = new ContainerLaunchedEvent(containerId, random.nextInt(), + applicationAttemptId); + break; + case CONTAINER_STOPPED: + event = new ContainerStoppedEvent(containerId, random.nextInt(), -1, applicationAttemptId); + break; + case DAG_COMMIT_STARTED: + event = new DAGCommitStartedEvent(); + break; + case VERTEX_COMMIT_STARTED: + event = new VertexCommitStartedEvent(); + break; + case VERTEX_GROUP_COMMIT_STARTED: + event = new VertexGroupCommitStartedEvent(); + break; + case VERTEX_GROUP_COMMIT_FINISHED: + event = new VertexGroupCommitFinishedEvent(); + break; + case DAG_RECOVERED: + event = new DAGRecoveredEvent(applicationAttemptId, tezDAGID, dagPlan.getName(), + user, random.nextLong(), containerLogs); + break; + case DAG_KILL_REQUEST: + event = new DAGKillRequestEvent(); + break; + default: + Assert.fail("Unhandled event type " + eventType); + } + if (event == null || !event.isHistoryEvent()) { + continue; + } + converter.convert(event); + } + } + + static class MockVersionInfo extends VersionInfo { + MockVersionInfo() { + super("component", "1.1.0", "rev1", "20120101", "git.apache.org"); + } + } + + private String findEventData(HistoryEventProto proto, String key) { + for (KVPair data : proto.getEventDataList()) { + if (data.getKey().equals(key)) { + return data.getValue(); + } + } + return null; + } + + private void assertEventData(HistoryEventProto proto, String key, String value) { + String evtVal = findEventData(proto, key); + if (evtVal == null) { + Assert.fail("Cannot find kv pair: " + key); + } + if (value != null) { + Assert.assertEquals(value, evtVal); + } + } + + private void assertNoEventData(HistoryEventProto proto, String key) { + for (KVPair data : proto.getEventDataList()) { + if (data.getKey().equals(key)) { + Assert.fail("Found find kv pair: " + key); + } + } + } + + private String safeToString(Object obj) { + return obj == null ? "" : obj.toString(); + } + + private void assertCommon(HistoryEventProto proto, HistoryEventType type, long eventTime, + EntityTypes entityType, ApplicationAttemptId appAttemptId, String user, int numData) { + Assert.assertEquals(type.name(), proto.getEventType()); + Assert.assertEquals(eventTime, proto.getEventTime()); + // Assert.assertEquals(safeToString(appId), proto.getAppId()); + Assert.assertEquals(safeToString(appAttemptId), proto.getAppAttemptId()); + Assert.assertEquals(safeToString(user), proto.getUser()); + if (entityType != null) { + switch (entityType) { // Intentional fallthrough. + case TEZ_TASK_ATTEMPT_ID: + Assert.assertEquals(tezTaskAttemptID.toString(), proto.getTaskAttemptId()); + case TEZ_TASK_ID: + Assert.assertEquals(tezTaskID.toString(), proto.getTaskId()); + case TEZ_VERTEX_ID: + Assert.assertEquals(tezVertexID.toString(), proto.getVertexId()); + case TEZ_DAG_ID: + Assert.assertEquals(tezDAGID.toString(), proto.getDagId()); + case TEZ_APPLICATION: + Assert.assertEquals(applicationId.toString(), proto.getAppId()); + break; + default: + Assert.fail("Invalid type: " + entityType.name()); + } + } + Assert.assertEquals(numData, proto.getEventDataCount()); + } + + @Test(timeout = 5000) + public void testConvertAppLaunchedEvent() { + long launchTime = random.nextLong(); + long submitTime = random.nextLong(); + Configuration conf = new Configuration(false); + conf.set("foo", "bar"); + conf.set("applicationId", "1234"); + + MockVersionInfo mockVersionInfo = new MockVersionInfo(); + AppLaunchedEvent event = new AppLaunchedEvent(applicationId, launchTime, submitTime, user, + conf, mockVersionInfo); + HistoryEventProto proto = converter.convert(event); + + assertCommon(proto, HistoryEventType.APP_LAUNCHED, launchTime, EntityTypes.TEZ_APPLICATION, + null, user, 3); + assertEventData(proto, ATSConstants.CONFIG, null); + assertEventData(proto, ATSConstants.TEZ_VERSION, null); + assertEventData(proto, ATSConstants.DAG_AM_WEB_SERVICE_VERSION, AMWebController.VERSION); + } + + @Test(timeout = 5000) + public void testConvertAMLaunchedEvent() { + long launchTime = random.nextLong(); + long submitTime = random.nextLong(); + AMLaunchedEvent event = new AMLaunchedEvent(applicationAttemptId, launchTime, submitTime, + user); + HistoryEventProto proto = converter.convert(event); + assertCommon(proto, HistoryEventType.AM_LAUNCHED, launchTime, EntityTypes.TEZ_APPLICATION, + applicationAttemptId, user, 1); + assertEventData(proto, ATSConstants.APP_SUBMIT_TIME, String.valueOf(submitTime)); + } + + @Test(timeout = 5000) + public void testConvertAMStartedEvent() { + long startTime = random.nextLong(); + AMStartedEvent event = new AMStartedEvent(applicationAttemptId, startTime, user); + HistoryEventProto proto = converter.convert(event); + assertCommon(proto, HistoryEventType.AM_STARTED, startTime, EntityTypes.TEZ_APPLICATION, + applicationAttemptId, user, 0); + } + + @Test(timeout = 5000) + public void testConvertContainerLaunchedEvent() { + long launchTime = random.nextLong(); + ContainerLaunchedEvent event = new ContainerLaunchedEvent(containerId, launchTime, + applicationAttemptId); + HistoryEventProto proto = converter.convert(event); + assertCommon(proto, HistoryEventType.CONTAINER_LAUNCHED, launchTime, EntityTypes.TEZ_APPLICATION, + applicationAttemptId, null, 1); + assertEventData(proto, ATSConstants.CONTAINER_ID, containerId.toString()); + } + + @Test(timeout = 5000) + public void testConvertContainerStoppedEvent() { + long stopTime = random.nextLong(); + int exitStatus = random.nextInt(); + ContainerStoppedEvent event = new ContainerStoppedEvent(containerId, stopTime, exitStatus, + applicationAttemptId); + HistoryEventProto proto = converter.convert(event); + assertCommon(proto, HistoryEventType.CONTAINER_STOPPED, stopTime, EntityTypes.TEZ_APPLICATION, + applicationAttemptId, null, 3); + assertEventData(proto, ATSConstants.CONTAINER_ID, containerId.toString()); + assertEventData(proto, ATSConstants.EXIT_STATUS, String.valueOf(exitStatus)); + assertEventData(proto, ATSConstants.FINISH_TIME, String.valueOf(stopTime)); + } + + @Test(timeout = 5000) + public void testConvertDAGStartedEvent() { + long startTime = random.nextLong(); + String dagName = "testDagName"; + DAGStartedEvent event = new DAGStartedEvent(tezDAGID, startTime, user, dagName); + HistoryEventProto proto = converter.convert(event); + assertCommon(proto, HistoryEventType.DAG_STARTED, startTime, EntityTypes.TEZ_DAG_ID, null, + user, 2); + assertEventData(proto, ATSConstants.DAG_NAME, dagName); + assertEventData(proto, ATSConstants.STATUS, DAGState.RUNNING.name()); + } + + @Test(timeout = 5000) + public void testConvertDAGSubmittedEvent() { + long submitTime = random.nextLong(); + + final String queueName = "TEST_DAG_SUBMITTED"; + DAGSubmittedEvent event = new DAGSubmittedEvent(tezDAGID, submitTime, dagPlan, + applicationAttemptId, null, user, null, containerLogs, queueName); + HistoryEventProto proto = converter.convert(event); + assertCommon(proto, HistoryEventType.DAG_SUBMITTED, submitTime, EntityTypes.TEZ_DAG_ID, + applicationAttemptId, user, 8); + + assertEventData(proto, ATSConstants.DAG_NAME, dagPlan.getName()); + assertEventData(proto, ATSConstants.DAG_QUEUE_NAME, event.getQueueName()); + assertEventData(proto, ATSConstants.DAG_AM_WEB_SERVICE_VERSION, AMWebController.VERSION); + assertEventData(proto, ATSConstants.IN_PROGRESS_LOGS_URL + "_" + + applicationAttemptId.getAttemptId(), containerLogs); + assertEventData(proto, ATSConstants.CALLER_CONTEXT_ID, + dagPlan.getCallerContext().getCallerId()); + assertEventData(proto, ATSConstants.CALLER_CONTEXT_TYPE, + dagPlan.getCallerContext().getCallerType()); + assertEventData(proto, ATSConstants.CALLER_CONTEXT, dagPlan.getCallerContext().getContext()); + assertEventData(proto, ATSConstants.DAG_PLAN, null); + } + + @Test(timeout = 5000) + public void testConvertTaskAttemptFinishedEvent() { + String vertexName = "testVertex"; + long creationTime = random.nextLong(); + long startTime = creationTime + 1000; + long allocationTime = creationTime + 1001; + long finishTime = startTime + 1002; + TaskAttemptState state = TaskAttemptState + .values()[random.nextInt(TaskAttemptState.values().length)]; + TaskAttemptTerminationCause error = TaskAttemptTerminationCause + .values()[random.nextInt(TaskAttemptTerminationCause.values().length)]; + String diagnostics = "random diagnostics message"; + TezCounters counters = new TezCounters(); + long lastDataEventTime = finishTime - 1; + List events = Lists.newArrayList(); + events.add(new DataEventDependencyInfo(lastDataEventTime, tezTaskAttemptID)); + events.add(new DataEventDependencyInfo(lastDataEventTime, tezTaskAttemptID)); + + TaskAttemptFinishedEvent event = new TaskAttemptFinishedEvent(tezTaskAttemptID, vertexName, + startTime, finishTime, state, TaskFailureType.FATAL, error, diagnostics, counters, events, + null, creationTime, tezTaskAttemptID, allocationTime, containerId, nodeId, "inProgressURL", + "logsURL", "nodeHttpAddress"); + HistoryEventProto proto = converter.convert(event); + assertCommon(proto, HistoryEventType.TASK_ATTEMPT_FINISHED, finishTime, + EntityTypes.TEZ_DAG_ID, null, null, 17); + + assertEventData(proto, ATSConstants.STATUS, state.name()); + assertEventData(proto, ATSConstants.CREATION_CAUSAL_ATTEMPT, tezTaskAttemptID.toString()); + assertEventData(proto, ATSConstants.CREATION_TIME, String.valueOf(creationTime)); + assertEventData(proto, ATSConstants.ALLOCATION_TIME, String.valueOf(allocationTime)); + assertEventData(proto, ATSConstants.START_TIME, String.valueOf(startTime)); + assertEventData(proto, ATSConstants.TIME_TAKEN, String.valueOf(finishTime - startTime)); + assertEventData(proto, ATSConstants.TASK_FAILURE_TYPE, TaskFailureType.FATAL.name()); + assertEventData(proto, ATSConstants.TASK_ATTEMPT_ERROR_ENUM, error.name()); + assertEventData(proto, ATSConstants.DIAGNOSTICS, diagnostics); + assertEventData(proto, ATSConstants.LAST_DATA_EVENTS, null); + assertEventData(proto, ATSConstants.COUNTERS, null); + assertEventData(proto, ATSConstants.IN_PROGRESS_LOGS_URL, "inProgressURL"); + assertEventData(proto, ATSConstants.COMPLETED_LOGS_URL, "logsURL"); + assertEventData(proto, ATSConstants.NODE_ID, nodeId.toString()); + assertEventData(proto, ATSConstants.CONTAINER_ID, containerId.toString()); + assertEventData(proto, ATSConstants.NODE_HTTP_ADDRESS, "nodeHttpAddress"); + + TaskAttemptFinishedEvent eventWithNullFailureType = + new TaskAttemptFinishedEvent(tezTaskAttemptID, vertexName, + startTime, finishTime, state, null, error, diagnostics, counters, events, null, + creationTime, + tezTaskAttemptID, allocationTime, containerId, nodeId, "inProgressURL", "logsURL", + "nodeHttpAddress"); + proto = converter.convert(eventWithNullFailureType); + assertNoEventData(proto, ATSConstants.TASK_FAILURE_TYPE); + } + + @Test(timeout = 5000) + public void testConvertDAGInitializedEvent() { + long initTime = random.nextLong(); + + Map nameIdMap = new HashMap(); + nameIdMap.put("foo", tezVertexID); + + DAGInitializedEvent event = new DAGInitializedEvent(tezDAGID, initTime, "user", "dagName", + nameIdMap); + HistoryEventProto proto = converter.convert(event); + assertCommon(proto, HistoryEventType.DAG_INITIALIZED, initTime, + EntityTypes.TEZ_DAG_ID, null, user, 2); + assertEventData(proto, ATSConstants.DAG_NAME, "dagName"); + assertEventData(proto, ATSConstants.VERTEX_NAME_ID_MAPPING, null); + } + + @Test(timeout = 5000) + public void testConvertDAGFinishedEvent() { + long finishTime = random.nextLong(); + long startTime = random.nextLong(); + Map taskStats = new HashMap(); + taskStats.put("FOO", 100); + taskStats.put("BAR", 200); + + DAGFinishedEvent event = new DAGFinishedEvent(tezDAGID, startTime, finishTime, DAGState.ERROR, + "diagnostics", null, user, dagPlan.getName(), taskStats, applicationAttemptId, dagPlan); + HistoryEventProto proto = converter.convert(event); + assertCommon(proto, HistoryEventType.DAG_FINISHED, finishTime, + EntityTypes.TEZ_DAG_ID, applicationAttemptId, user, 11); + + assertEventData(proto, ATSConstants.DAG_NAME, dagPlan.getName()); + assertEventData(proto, ATSConstants.STATUS, DAGState.ERROR.name()); + assertEventData(proto, ATSConstants.CALLER_CONTEXT_ID, + dagPlan.getCallerContext().getCallerId()); + assertEventData(proto, ATSConstants.CALLER_CONTEXT_TYPE, + dagPlan.getCallerContext().getCallerType()); + assertEventData(proto, ATSConstants.START_TIME, String.valueOf(startTime)); + assertEventData(proto, ATSConstants.TIME_TAKEN, String.valueOf(finishTime - startTime)); + assertEventData(proto, ATSConstants.DIAGNOSTICS, "diagnostics"); + assertEventData(proto, ATSConstants.COMPLETION_APPLICATION_ATTEMPT_ID, + applicationAttemptId.toString()); + assertEventData(proto, "FOO", String.valueOf(100)); + assertEventData(proto, "BAR", String.valueOf(200)); + assertEventData(proto, ATSConstants.COUNTERS, null); + } + + @Test(timeout = 5000) + public void testConvertVertexInitializedEvent() { + long initRequestedTime = random.nextLong(); + long initedTime = random.nextLong(); + int numTasks = random.nextInt(); + VertexInitializedEvent event = new VertexInitializedEvent(tezVertexID, "v1", initRequestedTime, + initedTime, numTasks, "proc", null, null, + new ServicePluginInfo().setContainerLauncherName("abc") + .setTaskSchedulerName("def").setTaskCommunicatorName("ghi") + .setContainerLauncherClassName("abc1") + .setTaskSchedulerClassName("def1") + .setTaskCommunicatorClassName("ghi1")); + + HistoryEventProto proto = converter.convert(event); + assertCommon(proto, HistoryEventType.VERTEX_INITIALIZED, initedTime, + EntityTypes.TEZ_VERTEX_ID, null, null, 6); + + assertEventData(proto, ATSConstants.VERTEX_NAME, "v1"); + assertEventData(proto, ATSConstants.PROCESSOR_CLASS_NAME, "proc"); + assertEventData(proto, ATSConstants.INIT_REQUESTED_TIME, String.valueOf(initRequestedTime)); + assertEventData(proto, ATSConstants.NUM_TASKS, String.valueOf(numTasks)); + assertEventData(proto, ATSConstants.SERVICE_PLUGIN, null); + + /* + Assert.assertNotNull(timelineEntity.getOtherInfo().get(ATSConstants.SERVICE_PLUGIN)); + Assert.assertEquals("abc", + ((Map)timelineEntity.getOtherInfo().get(ATSConstants.SERVICE_PLUGIN)).get( + ATSConstants.CONTAINER_LAUNCHER_NAME)); + Assert.assertEquals("def", + ((Map)timelineEntity.getOtherInfo().get(ATSConstants.SERVICE_PLUGIN)).get( + ATSConstants.TASK_SCHEDULER_NAME)); + Assert.assertEquals("ghi", + ((Map)timelineEntity.getOtherInfo().get(ATSConstants.SERVICE_PLUGIN)).get( + ATSConstants.TASK_COMMUNICATOR_NAME)); + Assert.assertEquals("abc1", + ((Map)timelineEntity.getOtherInfo().get(ATSConstants.SERVICE_PLUGIN)).get( + ATSConstants.CONTAINER_LAUNCHER_CLASS_NAME)); + Assert.assertEquals("def1", + ((Map)timelineEntity.getOtherInfo().get(ATSConstants.SERVICE_PLUGIN)).get( + ATSConstants.TASK_SCHEDULER_CLASS_NAME)); + Assert.assertEquals("ghi1", + ((Map)timelineEntity.getOtherInfo().get(ATSConstants.SERVICE_PLUGIN)).get( + ATSConstants.TASK_COMMUNICATOR_CLASS_NAME)); + */ + } + + @Test(timeout = 5000) + public void testConvertVertexStartedEvent() { + long startRequestedTime = random.nextLong(); + long startTime = random.nextLong(); + + VertexStartedEvent event = new VertexStartedEvent(tezVertexID, startRequestedTime, startTime); + HistoryEventProto proto = converter.convert(event); + assertCommon(proto, HistoryEventType.VERTEX_STARTED, startTime, + EntityTypes.TEZ_VERTEX_ID, null, null, 2); + assertEventData(proto, ATSConstants.START_REQUESTED_TIME, String.valueOf(startRequestedTime)); + assertEventData(proto, ATSConstants.STATUS, VertexState.RUNNING.name()); + } + + @Test(timeout = 5000) + public void testConvertVertexFinishedEvent() { + String vertexName = "v1"; + long initRequestedTime = random.nextLong(); + long initedTime = random.nextLong(); + long startRequestedTime = random.nextLong(); + long startTime = random.nextLong(); + long finishTime = random.nextLong(); + Map taskStats = new HashMap(); + taskStats.put("FOO", 100); + taskStats.put("BAR", 200); + VertexStats vertexStats = new VertexStats(); + + VertexFinishedEvent event = new VertexFinishedEvent(tezVertexID, vertexName, 1, + initRequestedTime, initedTime, startRequestedTime, startTime, finishTime, + VertexState.ERROR, "diagnostics", null, vertexStats, taskStats, + new ServicePluginInfo().setContainerLauncherName("abc") + .setTaskSchedulerName("def").setTaskCommunicatorName("ghi") + .setContainerLauncherClassName("abc1") + .setTaskSchedulerClassName("def1") + .setTaskCommunicatorClassName("ghi1")); + HistoryEventProto proto = converter.convert(event); + assertCommon(proto, HistoryEventType.VERTEX_FINISHED, finishTime, + EntityTypes.TEZ_VERTEX_ID, null, null, 9); + + assertEventData(proto, ATSConstants.VERTEX_NAME, vertexName); + assertEventData(proto, ATSConstants.STATUS, VertexState.ERROR.name()); + + assertEventData(proto, ATSConstants.TIME_TAKEN, String.valueOf(finishTime - startTime)); + assertEventData(proto, ATSConstants.DIAGNOSTICS, "diagnostics"); + assertEventData(proto, ATSConstants.COUNTERS, null); + assertEventData(proto, ATSConstants.STATS, null); + assertEventData(proto, "FOO", "100"); + assertEventData(proto, "BAR", "200"); + + assertEventData(proto, ATSConstants.SERVICE_PLUGIN, null); + /* + Assert.assertEquals("abc", + ((Map)timelineEntity.getOtherInfo().get(ATSConstants.SERVICE_PLUGIN)).get( + ATSConstants.CONTAINER_LAUNCHER_NAME)); + Assert.assertEquals("def", + ((Map)timelineEntity.getOtherInfo().get(ATSConstants.SERVICE_PLUGIN)).get( + ATSConstants.TASK_SCHEDULER_NAME)); + Assert.assertEquals("ghi", + ((Map)timelineEntity.getOtherInfo().get(ATSConstants.SERVICE_PLUGIN)).get( + ATSConstants.TASK_COMMUNICATOR_NAME)); + Assert.assertEquals("abc1", + ((Map)timelineEntity.getOtherInfo().get(ATSConstants.SERVICE_PLUGIN)).get( + ATSConstants.CONTAINER_LAUNCHER_CLASS_NAME)); + Assert.assertEquals("def1", + ((Map)timelineEntity.getOtherInfo().get(ATSConstants.SERVICE_PLUGIN)).get( + ATSConstants.TASK_SCHEDULER_CLASS_NAME)); + Assert.assertEquals("ghi1", + ((Map)timelineEntity.getOtherInfo().get(ATSConstants.SERVICE_PLUGIN)).get( + ATSConstants.TASK_COMMUNICATOR_CLASS_NAME)); + */ + } + + @Test(timeout = 5000) + public void testConvertTaskStartedEvent() { + long scheduleTime = random.nextLong(); + long startTime = random.nextLong(); + TaskStartedEvent event = new TaskStartedEvent(tezTaskID, "v1", scheduleTime, startTime); + HistoryEventProto proto = converter.convert(event); + assertCommon(proto, HistoryEventType.TASK_STARTED, startTime, + EntityTypes.TEZ_TASK_ID, null, null, 2); + + assertEventData(proto, ATSConstants.SCHEDULED_TIME, String.valueOf(scheduleTime)); + assertEventData(proto, ATSConstants.STATUS, TaskState.SCHEDULED.name()); + } + + @Test(timeout = 5000) + public void testConvertTaskAttemptStartedEvent() { + long startTime = random.nextLong(); + TaskAttemptStartedEvent event = new TaskAttemptStartedEvent(tezTaskAttemptID, "v1", + startTime, containerId, nodeId, "inProgressURL", "logsURL", "nodeHttpAddress"); + HistoryEventProto proto = converter.convert(event); + assertCommon(proto, HistoryEventType.TASK_ATTEMPT_STARTED, startTime, + EntityTypes.TEZ_TASK_ATTEMPT_ID, null, null, 6); + + assertEventData(proto, ATSConstants.STATUS, TaskAttemptState.RUNNING.name()); + assertEventData(proto, ATSConstants.IN_PROGRESS_LOGS_URL, "inProgressURL"); + assertEventData(proto, ATSConstants.COMPLETED_LOGS_URL, "logsURL"); + assertEventData(proto, ATSConstants.NODE_ID, nodeId.toString()); + assertEventData(proto, ATSConstants.CONTAINER_ID, containerId.toString()); + assertEventData(proto, ATSConstants.NODE_HTTP_ADDRESS, "nodeHttpAddress"); + } + + @Test(timeout = 5000) + public void testConvertTaskFinishedEvent() { + String vertexName = "testVertexName"; + long startTime = random.nextLong(); + long finishTime = random.nextLong(); + TaskState state = TaskState.values()[random.nextInt(TaskState.values().length)]; + String diagnostics = "diagnostics message"; + TezCounters counters = new TezCounters(); + + TaskFinishedEvent event = new TaskFinishedEvent(tezTaskID, vertexName, startTime, finishTime, + tezTaskAttemptID, state, diagnostics, counters, 3); + HistoryEventProto proto = converter.convert(event); + assertCommon(proto, HistoryEventType.TASK_FINISHED, finishTime, + EntityTypes.TEZ_TASK_ID, null, null, 6); + + assertEventData(proto, ATSConstants.STATUS, state.name()); + assertEventData(proto, ATSConstants.TIME_TAKEN, String.valueOf(finishTime - startTime)); + assertEventData(proto, ATSConstants.SUCCESSFUL_ATTEMPT_ID, tezTaskAttemptID.toString()); + assertEventData(proto, ATSConstants.NUM_FAILED_TASKS_ATTEMPTS, "3"); + assertEventData(proto, ATSConstants.DIAGNOSTICS, diagnostics); + assertEventData(proto, ATSConstants.COUNTERS, null); + } + + @Test(timeout = 5000) + public void testConvertVertexReconfigreDoneEvent() { + TezVertexID vId = tezVertexID; + Map edgeMgrs = + new HashMap(); + + edgeMgrs.put("a", EdgeProperty.create(EdgeManagerPluginDescriptor.create("a.class") + .setHistoryText("text"), DataSourceType.PERSISTED, SchedulingType.SEQUENTIAL, + OutputDescriptor.create("Out"), InputDescriptor.create("In"))); + VertexConfigurationDoneEvent event = new VertexConfigurationDoneEvent(vId, 0L, 1, null, + edgeMgrs, null, true); + HistoryEventProto proto = converter.convert(event); + assertCommon(proto, HistoryEventType.VERTEX_CONFIGURE_DONE, 0L, + EntityTypes.TEZ_VERTEX_ID, null, null, 2); + assertEventData(proto, ATSConstants.NUM_TASKS, "1"); + assertEventData(proto, ATSConstants.UPDATED_EDGE_MANAGERS, null); + + /* + Map updatedEdgeMgrs = (Map) + evt.getEventInfo().get(ATSConstants.UPDATED_EDGE_MANAGERS); + Assert.assertEquals(1, updatedEdgeMgrs.size()); + Assert.assertTrue(updatedEdgeMgrs.containsKey("a")); + Map updatedEdgeMgr = (Map) updatedEdgeMgrs.get("a"); + + Assert.assertEquals(DataMovementType.CUSTOM.name(), + updatedEdgeMgr.get(DAGUtils.DATA_MOVEMENT_TYPE_KEY)); + Assert.assertEquals("In", updatedEdgeMgr.get(DAGUtils.EDGE_DESTINATION_CLASS_KEY)); + Assert.assertEquals("a.class", updatedEdgeMgr.get(DAGUtils.EDGE_MANAGER_CLASS_KEY)); + */ + } + + @Test(timeout = 5000) + public void testConvertDAGRecoveredEvent() { + long recoverTime = random.nextLong(); + DAGRecoveredEvent event = new DAGRecoveredEvent(applicationAttemptId, tezDAGID, + dagPlan.getName(), user, recoverTime, containerLogs); + HistoryEventProto proto = converter.convert(event); + assertCommon(proto, HistoryEventType.DAG_RECOVERED, recoverTime, + EntityTypes.TEZ_DAG_ID, applicationAttemptId, user, 2); + assertEventData(proto, ATSConstants.IN_PROGRESS_LOGS_URL + "_" + + applicationAttemptId.getAttemptId(), containerLogs); + assertEventData(proto, ATSConstants.DAG_NAME, dagPlan.getName()); + } + + @Test(timeout = 5000) + public void testConvertDAGRecoveredEvent2() { + long recoverTime = random.nextLong(); + + DAGRecoveredEvent event = new DAGRecoveredEvent(applicationAttemptId, tezDAGID, + dagPlan.getName(), user, recoverTime, DAGState.ERROR, "mock reason", containerLogs); + HistoryEventProto proto = converter.convert(event); + assertCommon(proto, HistoryEventType.DAG_RECOVERED, recoverTime, + EntityTypes.TEZ_DAG_ID, applicationAttemptId, user, 4); + assertEventData(proto, ATSConstants.DAG_STATE, DAGState.ERROR.name()); + assertEventData(proto, ATSConstants.RECOVERY_FAILURE_REASON, "mock reason"); + assertEventData(proto, ATSConstants.IN_PROGRESS_LOGS_URL + "_" + + applicationAttemptId.getAttemptId(), containerLogs); + assertEventData(proto, ATSConstants.DAG_NAME, dagPlan.getName()); + } +} diff --git a/tez-plugins/tez-protobuf-history-plugin/src/test/java/org/apache/tez/dag/history/logging/proto/TestProtoHistoryLoggingService.java b/tez-plugins/tez-protobuf-history-plugin/src/test/java/org/apache/tez/dag/history/logging/proto/TestProtoHistoryLoggingService.java new file mode 100644 index 0000000000..f599e61d2c --- /dev/null +++ b/tez-plugins/tez-protobuf-history-plugin/src/test/java/org/apache/tez/dag/history/logging/proto/TestProtoHistoryLoggingService.java @@ -0,0 +1,371 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

    + * http://www.apache.org/licenses/LICENSE-2.0 + *

    + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tez.dag.history.logging.proto; + +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +import java.io.EOFException; +import java.io.IOException; +import java.lang.reflect.Field; +import java.time.LocalDate; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; + +import com.google.protobuf.CodedInputStream; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.permission.FsPermission; +import org.apache.hadoop.util.Time; +import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; +import org.apache.hadoop.yarn.api.records.ApplicationId; +import org.apache.hadoop.yarn.api.records.ContainerId; +import org.apache.hadoop.yarn.api.records.NodeId; +import org.apache.hadoop.yarn.util.Clock; +import org.apache.hadoop.yarn.util.SystemClock; +import org.apache.tez.common.VersionInfo; +import org.apache.tez.dag.api.TezConfiguration; +import org.apache.tez.dag.api.records.DAGProtos.DAGPlan; +import org.apache.tez.dag.app.AppContext; +import org.apache.tez.dag.app.dag.DAGState; +import org.apache.tez.dag.history.DAGHistoryEvent; +import org.apache.tez.dag.history.HistoryEventType; +import org.apache.tez.dag.history.events.AppLaunchedEvent; +import org.apache.tez.dag.history.events.DAGFinishedEvent; +import org.apache.tez.dag.history.events.DAGInitializedEvent; +import org.apache.tez.dag.history.events.DAGStartedEvent; +import org.apache.tez.dag.history.events.DAGSubmittedEvent; +import org.apache.tez.dag.history.events.TaskAttemptStartedEvent; +import org.apache.tez.dag.history.events.TaskStartedEvent; +import org.apache.tez.dag.history.events.VertexStartedEvent; +import org.apache.tez.dag.history.logging.proto.HistoryLoggerProtos.HistoryEventProto; +import org.apache.tez.dag.history.logging.proto.HistoryLoggerProtos.ManifestEntryProto; +import org.apache.tez.dag.records.TezDAGID; +import org.apache.tez.dag.records.TezTaskAttemptID; +import org.apache.tez.dag.records.TezTaskID; +import org.apache.tez.dag.records.TezVertexID; +import org.apache.tez.hadoop.shim.HadoopShim; +import org.junit.Assert; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TemporaryFolder; + +public class TestProtoHistoryLoggingService { + private static ApplicationId appId = ApplicationId.newInstance(1000l, 1); + private static ApplicationAttemptId attemptId = ApplicationAttemptId.newInstance(appId, 1); + private static String user = "TEST_USER"; + private Clock clock; + + @Rule + public TemporaryFolder tempFolder = new TemporaryFolder(); + + @Test + public void testService() throws Exception { + ProtoHistoryLoggingService service = createService(false); + service.start(); + TezDAGID dagId = TezDAGID.getInstance(appId, 0); + List protos = new ArrayList<>(); + for (DAGHistoryEvent event : makeHistoryEvents(dagId, service)) { + protos.add(new HistoryEventProtoConverter().convert(event.getHistoryEvent())); + service.handle(event); + } + service.stop(); + + TezProtoLoggers loggers = new TezProtoLoggers(); + Assert.assertTrue(loggers.setup(service.getConfig(), clock)); + + // Verify dag events are logged. + DatePartitionedLogger dagLogger = loggers.getDagEventsLogger(); + Path dagFilePath = dagLogger.getPathForDate(LocalDate.ofEpochDay(0), dagId + "_1"); + ProtoMessageReader reader = dagLogger.getReader(dagFilePath); + assertEventsRead(reader, protos, 1, protos.size()); + reader.close(); + + // Verify app events are logged. + DatePartitionedLogger appLogger = loggers.getAppEventsLogger(); + Path appFilePath = appLogger.getPathForDate(LocalDate.ofEpochDay(0), attemptId.toString()); + ProtoMessageReader appReader = appLogger.getReader(appFilePath); + long appOffset = appReader.getOffset(); + Assert.assertEquals(protos.get(0), appReader.readEvent()); + appReader.close(); + + // Verify manifest events are logged. + DatePartitionedLogger manifestLogger = loggers.getManifestEventsLogger(); + Path manifestFilePath = manifestLogger.getPathForDate( + LocalDate.ofEpochDay(0), attemptId.toString()); + ProtoMessageReader reader2 = manifestLogger.getReader(manifestFilePath); + ManifestEntryProto manifest = reader2.readEvent(); + Assert.assertEquals(appId.toString(), manifest.getAppId()); + Assert.assertEquals(dagId.toString(), manifest.getDagId()); + Assert.assertEquals(dagFilePath.toString(), manifest.getDagFilePath()); + Assert.assertEquals(appFilePath.toString(), manifest.getAppFilePath()); + Assert.assertEquals(appOffset, manifest.getAppLaunchedEventOffset()); + + // Verify offsets in manifest logger. + reader = dagLogger.getReader(new Path(manifest.getDagFilePath())); + reader.setOffset(manifest.getDagSubmittedEventOffset()); + HistoryEventProto evt = reader.readEvent(); + Assert.assertNotNull(evt); + Assert.assertEquals(HistoryEventType.DAG_SUBMITTED.name(), evt.getEventType()); + + reader.setOffset(manifest.getDagFinishedEventOffset()); + evt = reader.readEvent(); + Assert.assertNotNull(evt); + Assert.assertEquals(HistoryEventType.DAG_FINISHED.name(), evt.getEventType()); + reader.close(); + + // Verify manifest file scanner. + DagManifesFileScanner scanner = new DagManifesFileScanner(manifestLogger); + Assert.assertEquals(manifest, scanner.getNext()); + Assert.assertNull(scanner.getNext()); + scanner.close(); + } + + @Test + public void testProtoMessageSizeReset() throws Exception { + // This test case is to confirm that cin.resetSizeCounter() was indeed called + ProtoHistoryLoggingService service = createService(false); + service.start(); + TezDAGID dagId = TezDAGID.getInstance(appId, 0); + List protos = new ArrayList<>(); + for (DAGHistoryEvent event : makeHistoryEvents(dagId, service)) { + protos.add(new HistoryEventProtoConverter().convert(event.getHistoryEvent())); + service.handle(event); + } + service.stop(); + + TezProtoLoggers loggers = new TezProtoLoggers(); + Assert.assertTrue(loggers.setup(service.getConfig(), clock)); + + // Verify dag events are logged. + DatePartitionedLogger dagLogger = loggers.getDagEventsLogger(); + Path dagFilePath = dagLogger.getPathForDate(LocalDate.ofEpochDay(0), dagId + "_1"); + try (ProtoMessageReader reader = dagLogger.getReader(dagFilePath)) { + assertEventsRead(reader, protos, 1, protos.size()); + + int totalBytesRead = getTotalBytesRead(reader); + // cin.resetSizeCounter() in ProtoMessageWritable.java ensures that + // totalBytesRead will always be 0. For reference read javadoc of CodedInputStream. + Assert.assertEquals(totalBytesRead, 0); + } + } + + private static int getTotalBytesRead(ProtoMessageReader reader) throws NoSuchFieldException, + IllegalAccessException { + // writable is a private field in ProtoMessageReader.java + Field f = reader.getClass().getDeclaredField("writable"); + f.setAccessible(true); + ProtoMessageWritable writable = (ProtoMessageWritable) f.get(reader); + + // cin is a private filed in ProtoMessageWritable.java + Field c = writable.getClass().getDeclaredField("cin"); + c.setAccessible(true); + CodedInputStream cin = (CodedInputStream) c.get(writable); + + // Goal is to get value of: reader.writable.cin.getTotalBytesRead() + return cin.getTotalBytesRead(); + } + + @Test + public void testServiceSplitEvents() throws Exception { + ProtoHistoryLoggingService service = createService(true); + service.start(); + TezDAGID dagId = TezDAGID.getInstance(appId, 0); + List protos = new ArrayList<>(); + for (DAGHistoryEvent event : makeHistoryEvents(dagId, service)) { + protos.add(new HistoryEventProtoConverter().convert(event.getHistoryEvent())); + service.handle(event); + } + service.stop(); + + TezProtoLoggers loggers = new TezProtoLoggers(); + Assert.assertTrue(loggers.setup(service.getConfig(), clock)); + + // Verify dag events are logged. + DatePartitionedLogger dagLogger = loggers.getDagEventsLogger(); + Path dagFilePath1 = dagLogger.getPathForDate(LocalDate.ofEpochDay(0), dagId + "_1"); + Path dagFilePath2 = dagLogger.getPathForDate(LocalDate.ofEpochDay(0), dagId + "_1" + + ProtoHistoryLoggingService.SPLIT_DAG_EVENTS_FILE_SUFFIX); + + try (ProtoMessageReader reader = dagLogger.getReader(dagFilePath1)) { + assertEventsRead(reader, protos, 1, 1 + 3); + } + + try (ProtoMessageReader reader = dagLogger.getReader(dagFilePath2)) { + assertEventsRead(reader, protos, 4, protos.size()); + } + + // Verify app events are logged. + DatePartitionedLogger appLogger = loggers.getAppEventsLogger(); + Path appFilePath = appLogger.getPathForDate(LocalDate.ofEpochDay(0), attemptId.toString()); + ProtoMessageReader appReader = appLogger.getReader(appFilePath); + long appOffset = appReader.getOffset(); + Assert.assertEquals(protos.get(0), appReader.readEvent()); + appReader.close(); + + // Verify manifest events are logged. + DatePartitionedLogger manifestLogger = loggers.getManifestEventsLogger(); + DagManifesFileScanner scanner = new DagManifesFileScanner(manifestLogger); + Path manifestFilePath = manifestLogger.getPathForDate( + LocalDate.ofEpochDay(0), attemptId.toString()); + ProtoMessageReader manifestReader = manifestLogger.getReader( + manifestFilePath); + ManifestEntryProto manifest = manifestReader.readEvent(); + Assert.assertEquals(manifest, scanner.getNext()); + Assert.assertEquals(appId.toString(), manifest.getAppId()); + Assert.assertEquals(dagId.toString(), manifest.getDagId()); + Assert.assertEquals(dagFilePath1.toString(), manifest.getDagFilePath()); + Assert.assertEquals(appFilePath.toString(), manifest.getAppFilePath()); + Assert.assertEquals(appOffset, manifest.getAppLaunchedEventOffset()); + Assert.assertEquals(-1, manifest.getDagFinishedEventOffset()); + + HistoryEventProto evt = null; + // Verify offsets in manifest logger. + try (ProtoMessageReader reader = dagLogger.getReader( + new Path(manifest.getDagFilePath()))) { + reader.setOffset(manifest.getDagSubmittedEventOffset()); + evt = reader.readEvent(); + Assert.assertNotNull(evt); + Assert.assertEquals(HistoryEventType.DAG_SUBMITTED.name(), evt.getEventType()); + } + + manifest = manifestReader.readEvent(); + Assert.assertEquals(manifest, scanner.getNext()); + Assert.assertEquals(appId.toString(), manifest.getAppId()); + Assert.assertEquals(dagId.toString(), manifest.getDagId()); + Assert.assertEquals(dagFilePath2.toString(), manifest.getDagFilePath()); + Assert.assertEquals(appFilePath.toString(), manifest.getAppFilePath()); + Assert.assertEquals(appOffset, manifest.getAppLaunchedEventOffset()); + Assert.assertEquals(-1, manifest.getDagSubmittedEventOffset()); + + try (ProtoMessageReader reader = dagLogger.getReader( + new Path(manifest.getDagFilePath()))) { + reader.setOffset(manifest.getDagFinishedEventOffset()); + evt = reader.readEvent(); + Assert.assertNotNull(evt); + Assert.assertEquals(HistoryEventType.DAG_FINISHED.name(), evt.getEventType()); + } + + // Verify manifest file scanner. + Assert.assertNull(scanner.getNext()); + scanner.close(); + } + + @Test + public void testDirPermissions() throws IOException { + Path basePath = new Path(tempFolder.newFolder().getAbsolutePath()); + Configuration conf = new Configuration(); + FileSystem fs = basePath.getFileSystem(conf); + FsPermission expectedPermissions = FsPermission.createImmutable((short) 01777); + + // Check the directory already exists and doesn't have the expected permissions. + Assert.assertTrue(fs.exists(basePath)); + Assert.assertNotEquals(expectedPermissions, fs.getFileStatus(basePath).getPermission()); + + new DatePartitionedLogger<>(HistoryEventProto.PARSER, basePath, conf, new FixedClock(Time.now())); + + // Check the permissions they should be same as the expected permissions + Assert.assertEquals(expectedPermissions, fs.getFileStatus(basePath).getPermission()); + } + + private List makeHistoryEvents(TezDAGID dagId, + ProtoHistoryLoggingService service) { + List historyEvents = new ArrayList<>(); + DAGPlan dagPlan = DAGPlan.newBuilder().setName("DAGPlanMock").build(); + + long time = System.currentTimeMillis(); + Configuration conf = new Configuration(service.getConfig()); + historyEvents.add(new DAGHistoryEvent(null, new AppLaunchedEvent(appId, time, time, user, conf, + new VersionInfo("component", "1.1.0", "rev1", "20120101", "git.apache.org") {}))); + historyEvents.add(new DAGHistoryEvent(dagId, new DAGSubmittedEvent(dagId, time, + DAGPlan.getDefaultInstance(), attemptId, null, user, conf, null, "default"))); + historyEvents.add(new DAGHistoryEvent(dagId, new DAGInitializedEvent(dagId, time + 1, user, + "test_dag", Collections.emptyMap()))); + historyEvents.add(new DAGHistoryEvent(dagId, new DAGStartedEvent(dagId, time + 2, user, + "test_dag"))); + + TezVertexID vertexID = TezVertexID.getInstance(dagId, 1); + historyEvents.add(new DAGHistoryEvent(dagId, new VertexStartedEvent(vertexID, time, time))); + TezTaskID tezTaskID = TezTaskID.getInstance(vertexID, 1); + historyEvents + .add(new DAGHistoryEvent(dagId, new TaskStartedEvent(tezTaskID, "test", time, time))); + historyEvents.add(new DAGHistoryEvent(dagId, + new TaskAttemptStartedEvent(TezTaskAttemptID.getInstance(tezTaskID, 1), "test", time, + ContainerId.newContainerId(attemptId, 1), NodeId.newInstance("localhost", 8765), null, + null, null))); + historyEvents.add(new DAGHistoryEvent(dagId, new DAGFinishedEvent(dagId, time, time, + DAGState.ERROR, "diagnostics", null, user, dagPlan.getName(), + new HashMap(), attemptId, dagPlan))); + return historyEvents; + } + + private static class FixedClock implements Clock { + final Clock clock = SystemClock.getInstance(); + final long diff; + + public FixedClock(long startTime) { + diff = clock.getTime() - startTime; + } + + @Override + public long getTime() { + return clock.getTime() - diff; + } + } + + private ProtoHistoryLoggingService createService(boolean splitEvents) throws IOException { + ProtoHistoryLoggingService service = new ProtoHistoryLoggingService(); + clock = new FixedClock(0); // Start time is always first day, easier to write tests. + AppContext appContext = mock(AppContext.class); + when(appContext.getApplicationID()).thenReturn(appId); + when(appContext.getApplicationAttemptId()).thenReturn(attemptId); + when(appContext.getUser()).thenReturn(user); + when(appContext.getHadoopShim()).thenReturn(new HadoopShim() {}); + when(appContext.getClock()).thenReturn(clock); + service.setAppContext(appContext); + Configuration conf = new Configuration(false); + String basePath = tempFolder.newFolder().getAbsolutePath(); + conf.set("fs.file.impl", "org.apache.hadoop.fs.RawLocalFileSystem"); + conf.set(TezConfiguration.TEZ_HISTORY_LOGGING_PROTO_BASE_DIR, basePath); + conf.setBoolean(TezConfiguration.TEZ_HISTORY_LOGGING_PROTO_SPLIT_DAG_START, splitEvents); + service.init(conf); + return service; + } + + private void assertEventsRead(ProtoMessageReader reader, + List protos, int start, int finish) throws Exception { + for (int i = start; i < finish; ++i) { + try { + HistoryEventProto evt = reader.readEvent(); + Assert.assertEquals(protos.get(i), evt); + } catch (EOFException e) { + Assert.fail("Unexpected eof"); + } + } + try { + HistoryEventProto evt = reader.readEvent(); + Assert.assertNull(evt); + } catch (EOFException e) { + // Expected. + } + } +} diff --git a/tez-plugins/tez-yarn-timeline-cache-plugin/pom.xml b/tez-plugins/tez-yarn-timeline-cache-plugin/pom.xml index 99af09989f..e3d61c6df3 100644 --- a/tez-plugins/tez-yarn-timeline-cache-plugin/pom.xml +++ b/tez-plugins/tez-yarn-timeline-cache-plugin/pom.xml @@ -19,7 +19,7 @@ org.apache.tez tez-plugins - 0.9.1-SNAPSHOT + 0.10.5-SNAPSHOT tez-yarn-timeline-cache-plugin diff --git a/tez-plugins/tez-yarn-timeline-cache-plugin/src/main/java/org/apache/tez/dag/history/logging/ats/TimelineCachePluginImpl.java b/tez-plugins/tez-yarn-timeline-cache-plugin/src/main/java/org/apache/tez/dag/history/logging/ats/TimelineCachePluginImpl.java index d211feb0e3..2ee2c21b79 100644 --- a/tez-plugins/tez-yarn-timeline-cache-plugin/src/main/java/org/apache/tez/dag/history/logging/ats/TimelineCachePluginImpl.java +++ b/tez-plugins/tez-yarn-timeline-cache-plugin/src/main/java/org/apache/tez/dag/history/logging/ats/TimelineCachePluginImpl.java @@ -94,18 +94,18 @@ private Set convertToTimelineEntityGroupIds(String entity } else if (entityType.equals(EntityTypes.TEZ_VERTEX_ID.name())) { TezVertexID vertexID = TezVertexID.fromString(entityId); if (vertexID != null) { - return createTimelineEntityGroupIds(vertexID.getDAGId()); + return createTimelineEntityGroupIds(vertexID.getDAGID()); } } else if (entityType.equals(EntityTypes.TEZ_TASK_ID.name())) { TezTaskID taskID = TezTaskID.fromString(entityId); if (taskID != null) { - return createTimelineEntityGroupIds(taskID.getVertexID().getDAGId()); + return createTimelineEntityGroupIds(taskID.getDAGID()); } } else if (entityType.equals(EntityTypes.TEZ_TASK_ATTEMPT_ID.name())) { TezTaskAttemptID taskAttemptID = TezTaskAttemptID.fromString(entityId); if (taskAttemptID != null) { - return createTimelineEntityGroupIds(taskAttemptID.getTaskID().getVertexID().getDAGId()); + return createTimelineEntityGroupIds(taskAttemptID.getDAGID()); } } else if (entityType.equals(EntityTypes.TEZ_CONTAINER_ID.name())) { String cId = entityId; diff --git a/tez-plugins/tez-yarn-timeline-cache-plugin/src/main/javadoc/resources/META-INF/LICENSE.txt b/tez-plugins/tez-yarn-timeline-cache-plugin/src/main/javadoc/resources/META-INF/LICENSE similarity index 100% rename from tez-plugins/tez-yarn-timeline-cache-plugin/src/main/javadoc/resources/META-INF/LICENSE.txt rename to tez-plugins/tez-yarn-timeline-cache-plugin/src/main/javadoc/resources/META-INF/LICENSE diff --git a/tez-plugins/tez-yarn-timeline-cache-plugin/src/main/javadoc/resources/META-INF/NOTICE b/tez-plugins/tez-yarn-timeline-cache-plugin/src/main/javadoc/resources/META-INF/NOTICE new file mode 100644 index 0000000000..2595905699 --- /dev/null +++ b/tez-plugins/tez-yarn-timeline-cache-plugin/src/main/javadoc/resources/META-INF/NOTICE @@ -0,0 +1,6 @@ +Apache Tez +Copyright 2014-2024 The Apache Software Foundation + +This product includes software developed at +The Apache Software Foundation (http://www.apache.org/). + diff --git a/tez-plugins/tez-yarn-timeline-cache-plugin/src/main/javadoc/resources/META-INF/NOTICE.txt b/tez-plugins/tez-yarn-timeline-cache-plugin/src/main/javadoc/resources/META-INF/NOTICE.txt deleted file mode 100644 index 3f36fcc6ba..0000000000 --- a/tez-plugins/tez-yarn-timeline-cache-plugin/src/main/javadoc/resources/META-INF/NOTICE.txt +++ /dev/null @@ -1,6 +0,0 @@ -Apache Tez -Copyright (c) 2016 The Apache Software Foundation - -This product includes software developed at -The Apache Software Foundation (http://www.apache.org/). - diff --git a/tez-plugins/tez-yarn-timeline-cache-plugin/src/main/resources/META-INF/LICENSE.txt b/tez-plugins/tez-yarn-timeline-cache-plugin/src/main/resources/META-INF/LICENSE similarity index 100% rename from tez-plugins/tez-yarn-timeline-cache-plugin/src/main/resources/META-INF/LICENSE.txt rename to tez-plugins/tez-yarn-timeline-cache-plugin/src/main/resources/META-INF/LICENSE diff --git a/tez-plugins/tez-yarn-timeline-cache-plugin/src/main/resources/META-INF/NOTICE b/tez-plugins/tez-yarn-timeline-cache-plugin/src/main/resources/META-INF/NOTICE new file mode 100644 index 0000000000..2595905699 --- /dev/null +++ b/tez-plugins/tez-yarn-timeline-cache-plugin/src/main/resources/META-INF/NOTICE @@ -0,0 +1,6 @@ +Apache Tez +Copyright 2014-2024 The Apache Software Foundation + +This product includes software developed at +The Apache Software Foundation (http://www.apache.org/). + diff --git a/tez-plugins/tez-yarn-timeline-cache-plugin/src/main/resources/META-INF/NOTICE.txt b/tez-plugins/tez-yarn-timeline-cache-plugin/src/main/resources/META-INF/NOTICE.txt deleted file mode 100644 index 3f36fcc6ba..0000000000 --- a/tez-plugins/tez-yarn-timeline-cache-plugin/src/main/resources/META-INF/NOTICE.txt +++ /dev/null @@ -1,6 +0,0 @@ -Apache Tez -Copyright (c) 2016 The Apache Software Foundation - -This product includes software developed at -The Apache Software Foundation (http://www.apache.org/). - diff --git a/tez-plugins/tez-yarn-timeline-history-with-acls/pom.xml b/tez-plugins/tez-yarn-timeline-history-with-acls/pom.xml index 791f4e410c..4e3fcf25e8 100644 --- a/tez-plugins/tez-yarn-timeline-history-with-acls/pom.xml +++ b/tez-plugins/tez-yarn-timeline-history-with-acls/pom.xml @@ -19,7 +19,7 @@ org.apache.tez tez-plugins - 0.9.1-SNAPSHOT + 0.10.5-SNAPSHOT tez-yarn-timeline-history-with-acls @@ -126,7 +126,7 @@ org.mockito - mockito-all + mockito-core test @@ -139,6 +139,10 @@ jersey-json test + + org.apache.hadoop + hadoop-mapreduce-client-shuffle + diff --git a/tez-plugins/tez-yarn-timeline-history-with-acls/src/test/java/org/apache/tez/dag/history/ats/acls/TestATSHistoryWithACLs.java b/tez-plugins/tez-yarn-timeline-history-with-acls/src/test/java/org/apache/tez/dag/history/ats/acls/TestATSHistoryWithACLs.java index 8e5c95c36f..f284fe4049 100644 --- a/tez-plugins/tez-yarn-timeline-history-with-acls/src/test/java/org/apache/tez/dag/history/ats/acls/TestATSHistoryWithACLs.java +++ b/tez-plugins/tez-yarn-timeline-history-with-acls/src/test/java/org/apache/tez/dag/history/ats/acls/TestATSHistoryWithACLs.java @@ -25,8 +25,10 @@ import static org.mockito.Mockito.*; import java.io.IOException; +import java.util.ArrayList; import java.util.Collection; import java.util.Collections; +import java.util.List; import java.util.Random; import javax.ws.rs.core.MediaType; @@ -42,6 +44,7 @@ import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.api.records.timeline.TimelineDomain; import org.apache.hadoop.yarn.api.records.timeline.TimelineEntity; +import org.apache.hadoop.yarn.api.records.timeline.TimelineEvent; import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.tez.client.TezClient; import org.apache.tez.common.ReflectionUtils; @@ -63,6 +66,9 @@ import org.apache.tez.runtime.library.processor.SleepProcessor; import org.apache.tez.runtime.library.processor.SleepProcessor.SleepProcessorConfig; import org.apache.tez.tests.MiniTezClusterWithTimeline; +import org.codehaus.jettison.json.JSONArray; +import org.codehaus.jettison.json.JSONException; +import org.codehaus.jettison.json.JSONObject; import org.junit.AfterClass; import org.junit.Assert; import org.junit.BeforeClass; @@ -147,11 +153,50 @@ private K getTimelineData(String url, Class clazz) { ClientResponse response = resource.accept(MediaType.APPLICATION_JSON) .get(ClientResponse.class); assertEquals(200, response.getStatus()); - assertEquals(MediaType.APPLICATION_JSON_TYPE, response.getType()); + assertTrue(MediaType.APPLICATION_JSON_TYPE.isCompatible(response.getType())); - K entity = response.getEntity(clazz); - assertNotNull(entity); - return entity; + JSONObject entity = response.getEntity(JSONObject.class); + K converted = null; + try { + converted = convertJSONObjectToTimelineObject(entity, clazz); + } catch (JSONException e) { + throw new RuntimeException(e); + } + assertNotNull(converted); + return converted; + } + + private K convertJSONObjectToTimelineObject(JSONObject jsonObj, Class clazz) throws JSONException { + LOG.info("convertJSONObjectToEntity got object: " + jsonObj); + if (clazz == TimelineDomain.class) { + TimelineDomain domain = new TimelineDomain(); + domain.setId(jsonObj.getString("id")); + domain.setOwner(jsonObj.getString("owner")); + domain.setReaders(jsonObj.getString("readers")); + domain.setWriters(jsonObj.getString("writers")); + return (K) domain; + } else if (clazz == TimelineEntity.class) { + TimelineEntity entity = new TimelineEntity(); + entity.setEntityId(jsonObj.getString("entity")); + entity.setEntityType(jsonObj.getString("entitytype")); + entity.setDomainId(jsonObj.getString("domain")); + entity.setEvents(getEventsFromJSON(jsonObj)); + return (K) entity; + } else { + throw new RuntimeException( + "convertJSONObjectToTimelineObject doesn't support conversion from JSONObject to " + clazz); + } + } + + private List getEventsFromJSON(JSONObject jsonObj) throws JSONException { + List events = new ArrayList<>(); + JSONArray arrEvents = jsonObj.getJSONArray("events"); + for (int i = 0; i < arrEvents.length(); i++) { + TimelineEvent event = new TimelineEvent(); + event.setEventType(((JSONObject) arrEvents.get(i)).getString("eventtype")); + events.add(event); + } + return events; } private TimelineDomain getDomain(String domainId) { @@ -406,7 +451,7 @@ public void testDagLoggingDisabled() throws Exception { DAGHistoryEvent event = new DAGHistoryEvent(tezDAGID, submittedEvent); historyLoggingService.handle(new DAGHistoryEvent(tezDAGID, submittedEvent)); Thread.sleep(1000l); - String url = "http://" + timelineAddress + "/ws/v1/timeline/TEZ_DAG_ID/"+event.getDagID(); + String url = "http://" + timelineAddress + "/ws/v1/timeline/TEZ_DAG_ID/"+event.getDAGID(); Client client = new Client(); WebResource resource = client.resource(url); @@ -451,15 +496,16 @@ public void testDagLoggingEnabled() throws Exception { DAGHistoryEvent event = new DAGHistoryEvent(tezDAGID, submittedEvent); historyLoggingService.handle(new DAGHistoryEvent(tezDAGID, submittedEvent)); Thread.sleep(1000l); - String url = "http://" + timelineAddress + "/ws/v1/timeline/TEZ_DAG_ID/"+event.getDagID(); + String url = "http://" + timelineAddress + "/ws/v1/timeline/TEZ_DAG_ID/"+event.getDAGID(); Client client = new Client(); WebResource resource = client.resource(url); ClientResponse response = resource.accept(MediaType.APPLICATION_JSON) .get(ClientResponse.class); assertEquals(200, response.getStatus()); - assertEquals(MediaType.APPLICATION_JSON_TYPE, response.getType()); - TimelineEntity entity = response.getEntity(TimelineEntity.class); + assertTrue(MediaType.APPLICATION_JSON_TYPE.isCompatible(response.getType())); + JSONObject entityJson = response.getEntity(JSONObject.class); + TimelineEntity entity = convertJSONObjectToTimelineObject(entityJson, TimelineEntity.class); assertEquals(entity.getEntityType(), "TEZ_DAG_ID"); assertEquals(entity.getEvents().get(0).getEventType(), HistoryEventType.DAG_SUBMITTED.toString()); } diff --git a/tez-plugins/tez-yarn-timeline-history-with-fs/pom.xml b/tez-plugins/tez-yarn-timeline-history-with-fs/pom.xml index 627e72d147..024bb2408f 100644 --- a/tez-plugins/tez-yarn-timeline-history-with-fs/pom.xml +++ b/tez-plugins/tez-yarn-timeline-history-with-fs/pom.xml @@ -19,7 +19,7 @@ org.apache.tez tez-plugins - 0.9.1-SNAPSHOT + 0.10.5-SNAPSHOT tez-yarn-timeline-history-with-fs @@ -36,6 +36,10 @@ org.apache.tez tez-dag + + org.apache.tez + hadoop-shim + org.apache.tez tez-yarn-timeline-history @@ -112,6 +116,10 @@ test test-jar + + org.apache.hadoop + hadoop-mapreduce-client-shuffle + org.codehaus.jettison jettison @@ -126,7 +134,7 @@ org.mockito - mockito-all + mockito-core test @@ -151,4 +159,3 @@ - diff --git a/tez-plugins/tez-yarn-timeline-history-with-fs/src/main/java/org/apache/tez/dag/history/logging/ats/ATSV15HistoryLoggingService.java b/tez-plugins/tez-yarn-timeline-history-with-fs/src/main/java/org/apache/tez/dag/history/logging/ats/ATSV15HistoryLoggingService.java index a71f0d8db0..5068fb77db 100644 --- a/tez-plugins/tez-yarn-timeline-history-with-fs/src/main/java/org/apache/tez/dag/history/logging/ats/ATSV15HistoryLoggingService.java +++ b/tez-plugins/tez-yarn-timeline-history-with-fs/src/main/java/org/apache/tez/dag/history/logging/ats/ATSV15HistoryLoggingService.java @@ -311,9 +311,9 @@ public TimelineEntityGroupId getGroupId(DAGHistoryEvent event) { case VERTEX_GROUP_COMMIT_FINISHED: case DAG_RECOVERED: String entityGroupId = numDagsPerGroup > 1 - ? event.getDagID().getGroupId(numDagsPerGroup) - : event.getDagID().toString(); - return TimelineEntityGroupId.newInstance(event.getDagID().getApplicationId(), entityGroupId); + ? event.getDAGID().getGroupId(numDagsPerGroup) + : event.getDAGID().toString(); + return TimelineEntityGroupId.newInstance(event.getApplicationId(), entityGroupId); case APP_LAUNCHED: case AM_LAUNCHED: case AM_STARTED: @@ -333,7 +333,7 @@ public void handle(DAGHistoryEvent event) { private boolean isValidEvent(DAGHistoryEvent event) { HistoryEventType eventType = event.getHistoryEvent().getEventType(); - TezDAGID dagId = event.getDagID(); + TezDAGID dagId = event.getDAGID(); if (eventType.equals(HistoryEventType.DAG_SUBMITTED)) { DAGSubmittedEvent dagSubmittedEvent = @@ -373,7 +373,7 @@ private boolean isValidEvent(DAGHistoryEvent event) { private void handleEvents(DAGHistoryEvent event) { String domainId = getDomainForEvent(event); // skippedDags is updated in the above call so check again. - if (event.getDagID() != null && skippedDAGs.contains(event.getDagID())) { + if (event.getDAGID() != null && skippedDAGs.contains(event.getDAGID())) { return; } TimelineEntityGroupId groupId = getGroupId(event); @@ -417,7 +417,7 @@ private String getDomainForEvent(DAGHistoryEvent event) { return domainId; } - TezDAGID dagId = event.getDagID(); + TezDAGID dagId = event.getDAGID(); HistoryEvent historyEvent = event.getHistoryEvent(); if (dagId == null || !HistoryEventType.isDAGSpecificEvent(historyEvent.getEventType())) { return domainId; diff --git a/tez-plugins/tez-yarn-timeline-history-with-fs/src/main/javadoc/resources/META-INF/LICENSE.txt b/tez-plugins/tez-yarn-timeline-history-with-fs/src/main/javadoc/resources/META-INF/LICENSE similarity index 100% rename from tez-plugins/tez-yarn-timeline-history-with-fs/src/main/javadoc/resources/META-INF/LICENSE.txt rename to tez-plugins/tez-yarn-timeline-history-with-fs/src/main/javadoc/resources/META-INF/LICENSE diff --git a/tez-plugins/tez-yarn-timeline-history-with-fs/src/main/javadoc/resources/META-INF/NOTICE b/tez-plugins/tez-yarn-timeline-history-with-fs/src/main/javadoc/resources/META-INF/NOTICE new file mode 100644 index 0000000000..2595905699 --- /dev/null +++ b/tez-plugins/tez-yarn-timeline-history-with-fs/src/main/javadoc/resources/META-INF/NOTICE @@ -0,0 +1,6 @@ +Apache Tez +Copyright 2014-2024 The Apache Software Foundation + +This product includes software developed at +The Apache Software Foundation (http://www.apache.org/). + diff --git a/tez-plugins/tez-yarn-timeline-history-with-fs/src/main/javadoc/resources/META-INF/NOTICE.txt b/tez-plugins/tez-yarn-timeline-history-with-fs/src/main/javadoc/resources/META-INF/NOTICE.txt deleted file mode 100644 index 3f36fcc6ba..0000000000 --- a/tez-plugins/tez-yarn-timeline-history-with-fs/src/main/javadoc/resources/META-INF/NOTICE.txt +++ /dev/null @@ -1,6 +0,0 @@ -Apache Tez -Copyright (c) 2016 The Apache Software Foundation - -This product includes software developed at -The Apache Software Foundation (http://www.apache.org/). - diff --git a/tez-plugins/tez-yarn-timeline-history-with-fs/src/main/resources/META-INF/LICENSE.txt b/tez-plugins/tez-yarn-timeline-history-with-fs/src/main/resources/META-INF/LICENSE similarity index 100% rename from tez-plugins/tez-yarn-timeline-history-with-fs/src/main/resources/META-INF/LICENSE.txt rename to tez-plugins/tez-yarn-timeline-history-with-fs/src/main/resources/META-INF/LICENSE diff --git a/tez-plugins/tez-yarn-timeline-history-with-fs/src/main/resources/META-INF/NOTICE b/tez-plugins/tez-yarn-timeline-history-with-fs/src/main/resources/META-INF/NOTICE new file mode 100644 index 0000000000..2595905699 --- /dev/null +++ b/tez-plugins/tez-yarn-timeline-history-with-fs/src/main/resources/META-INF/NOTICE @@ -0,0 +1,6 @@ +Apache Tez +Copyright 2014-2024 The Apache Software Foundation + +This product includes software developed at +The Apache Software Foundation (http://www.apache.org/). + diff --git a/tez-plugins/tez-yarn-timeline-history-with-fs/src/main/resources/META-INF/NOTICE.txt b/tez-plugins/tez-yarn-timeline-history-with-fs/src/main/resources/META-INF/NOTICE.txt deleted file mode 100644 index 3f36fcc6ba..0000000000 --- a/tez-plugins/tez-yarn-timeline-history-with-fs/src/main/resources/META-INF/NOTICE.txt +++ /dev/null @@ -1,6 +0,0 @@ -Apache Tez -Copyright (c) 2016 The Apache Software Foundation - -This product includes software developed at -The Apache Software Foundation (http://www.apache.org/). - diff --git a/tez-plugins/tez-yarn-timeline-history-with-fs/src/test/java/org/apache/tez/dag/history/ats/acls/TestATSHistoryV15.java b/tez-plugins/tez-yarn-timeline-history-with-fs/src/test/java/org/apache/tez/dag/history/ats/acls/TestATSHistoryV15.java index a690a1903d..f49e588afe 100644 --- a/tez-plugins/tez-yarn-timeline-history-with-fs/src/test/java/org/apache/tez/dag/history/ats/acls/TestATSHistoryV15.java +++ b/tez-plugins/tez-yarn-timeline-history-with-fs/src/test/java/org/apache/tez/dag/history/ats/acls/TestATSHistoryV15.java @@ -20,9 +20,10 @@ import static org.junit.Assert.assertEquals; +import com.google.protobuf.CodedInputStream; +import com.google.protobuf.CodedOutputStream; + import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; import java.util.Random; import org.apache.hadoop.fs.LocatedFileStatus; @@ -32,8 +33,6 @@ import org.apache.tez.dag.app.AppContext; import org.apache.tez.dag.history.HistoryEvent; import org.apache.tez.dag.history.logging.ats.ATSV15HistoryLoggingService; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; @@ -60,6 +59,8 @@ import org.junit.Assert; import org.junit.BeforeClass; import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; @@ -97,6 +98,7 @@ public static void setup() throws IOException { 1, 1, 1, true); Configuration conf = new Configuration(); conf.setBoolean(YarnConfiguration.TIMELINE_SERVICE_ENABLED, true); + conf.set(YarnConfiguration.TIMELINE_SERVICE_LEVELDB_PATH, TEST_ROOT_DIR); conf.set("fs.defaultFS", remoteFs.getUri().toString()); // use HDFS conf.setInt("yarn.nodemanager.delete.debug-delay-sec", 20000); atsActivePath = new Path("/tmp/ats/active/" + random.nextInt(100000)); @@ -288,12 +290,12 @@ public boolean isHistoryEvent() { } @Override - public void toProtoStream(OutputStream outputStream) throws IOException { + public void toProtoStream(CodedOutputStream outputStream) throws IOException { } @Override - public void fromProtoStream(InputStream inputStream) throws IOException { + public void fromProtoStream(CodedInputStream inputStream) throws IOException { } }; diff --git a/tez-plugins/tez-yarn-timeline-history-with-fs/src/test/java/org/apache/tez/dag/history/logging/ats/TestATSV15HistoryLoggingService.java b/tez-plugins/tez-yarn-timeline-history-with-fs/src/test/java/org/apache/tez/dag/history/logging/ats/TestATSV15HistoryLoggingService.java index 96c3c80931..7ba14fffef 100644 --- a/tez-plugins/tez-yarn-timeline-history-with-fs/src/test/java/org/apache/tez/dag/history/logging/ats/TestATSV15HistoryLoggingService.java +++ b/tez-plugins/tez-yarn-timeline-history-with-fs/src/test/java/org/apache/tez/dag/history/logging/ats/TestATSV15HistoryLoggingService.java @@ -24,10 +24,8 @@ import static org.junit.Assert.assertNull; import static org.junit.Assert.assertTrue; -import static org.mockito.Matchers.anyObject; -import static org.mockito.Matchers.anyString; -import static org.mockito.Matchers.anyVararg; import static org.mockito.Mockito.any; +import static org.mockito.Mockito.anyString; import static org.mockito.Mockito.eq; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.times; @@ -42,7 +40,6 @@ import java.util.Map; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.security.token.Token; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.ContainerId; @@ -53,7 +50,6 @@ import org.apache.hadoop.yarn.api.records.timeline.TimelinePutResponse; import org.apache.hadoop.yarn.client.api.TimelineClient; import org.apache.hadoop.yarn.exceptions.YarnException; -import org.apache.hadoop.yarn.security.client.TimelineDelegationTokenIdentifier; import org.apache.tez.common.security.DAGAccessControls; import org.apache.tez.common.security.HistoryACLPolicyManager; import org.apache.tez.dag.api.TezConfiguration; @@ -71,7 +67,6 @@ import org.apache.tez.dag.records.TezVertexID; import org.apache.tez.hadoop.shim.HadoopShim; import org.junit.Test; -import org.mockito.Matchers; import org.mockito.invocation.InvocationOnMock; import org.mockito.stubbing.Answer; @@ -228,7 +223,8 @@ public void testNonSessionDomains() throws Exception { .setupSessionDAGACLs((Configuration)any(), eq(appId), eq("0"), (DAGAccessControls)any()); // All calls made with session domain id. - verify(historyACLPolicyManager, times(5)).updateTimelineEntityDomain(any(), eq("session-id")); + // NOTE: Expect 6 invocations for 5 history events because DAG_SUBMITTED becomes two separate timeline events. + verify(historyACLPolicyManager, times(6)).updateTimelineEntityDomain(any(), eq("session-id")); assertTrue(entityLog.size() > 0); service.stop(); @@ -447,17 +443,17 @@ private ATSV15HistoryLoggingService createService(int numDagsPerGroup) throws IO entityLog = new HashMap<>(); //timelineClient.init(conf); when(timelineClient.getDelegationToken(anyString())).thenReturn(null); - when(timelineClient.renewDelegationToken(Matchers.>any())).thenReturn(0L); - when(timelineClient.putEntities(Matchers.anyVararg())).thenAnswer(new Answer() { + when(timelineClient.renewDelegationToken(any())).thenReturn(0L); + when(timelineClient.putEntities(any())).thenAnswer(new Answer() { @Override public TimelinePutResponse answer(InvocationOnMock invocation) throws Throwable { return putEntityHelper(DEFAULT_GROUP_ID, invocation.getArguments(), 0); } }); - when(timelineClient.putEntities(any(ApplicationAttemptId.class), any(TimelineEntityGroupId.class), Matchers.anyVararg())).thenAnswer(new Answer() { + when(timelineClient.putEntities(any(), any(), any())).thenAnswer(new Answer() { @Override public TimelinePutResponse answer(InvocationOnMock invocation) throws Throwable { - return putEntityHelper(invocation.getArgumentAt(1, TimelineEntityGroupId.class), invocation.getArguments(), 2); + return putEntityHelper(invocation.getArgument(1, TimelineEntityGroupId.class), invocation.getArguments(), 2); } }); service.timelineClient = timelineClient; diff --git a/tez-plugins/tez-yarn-timeline-history/pom.xml b/tez-plugins/tez-yarn-timeline-history/pom.xml index 2d4192c52b..ff7d114eee 100644 --- a/tez-plugins/tez-yarn-timeline-history/pom.xml +++ b/tez-plugins/tez-yarn-timeline-history/pom.xml @@ -19,7 +19,7 @@ org.apache.tez tez-plugins - 0.9.1-SNAPSHOT + 0.10.5-SNAPSHOT tez-yarn-timeline-history @@ -120,7 +120,7 @@ org.mockito - mockito-all + mockito-core test diff --git a/tez-plugins/tez-yarn-timeline-history/src/main/java/org/apache/tez/dag/history/logging/ats/ATSHistoryLoggingService.java b/tez-plugins/tez-yarn-timeline-history/src/main/java/org/apache/tez/dag/history/logging/ats/ATSHistoryLoggingService.java index 6d035cce2c..c1883a9216 100644 --- a/tez-plugins/tez-yarn-timeline-history/src/main/java/org/apache/tez/dag/history/logging/ats/ATSHistoryLoggingService.java +++ b/tez-plugins/tez-yarn-timeline-history/src/main/java/org/apache/tez/dag/history/logging/ats/ATSHistoryLoggingService.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -291,7 +291,7 @@ public void handle(DAGHistoryEvent event) { private boolean isValidEvent(DAGHistoryEvent event) { HistoryEventType eventType = event.getHistoryEvent().getEventType(); - TezDAGID dagId = event.getDagID(); + TezDAGID dagId = event.getDAGID(); if (eventType.equals(HistoryEventType.DAG_SUBMITTED)) { DAGSubmittedEvent dagSubmittedEvent = @@ -333,7 +333,7 @@ private void handleEvents(List events) { for (DAGHistoryEvent event : events) { String domainId = getDomainForEvent(event); // skippedDags is updated in the above call so check again. - if (event.getDagID() != null && skippedDAGs.contains(event.getDagID())) { + if (event.getDAGID() != null && skippedDAGs.contains(event.getDAGID())) { continue; } List eventEntities = HistoryEventTimelineConversion.convertToTimelineEntities( @@ -377,7 +377,7 @@ private String getDomainForEvent(DAGHistoryEvent event) { return domainId; } - TezDAGID dagId = event.getDagID(); + TezDAGID dagId = event.getDAGID(); HistoryEvent historyEvent = event.getHistoryEvent(); if (dagId == null || !HistoryEventType.isDAGSpecificEvent(historyEvent.getEventType())) { return domainId; diff --git a/tez-plugins/tez-yarn-timeline-history/src/main/java/org/apache/tez/dag/history/logging/ats/HistoryEventTimelineConversion.java b/tez-plugins/tez-yarn-timeline-history/src/main/java/org/apache/tez/dag/history/logging/ats/HistoryEventTimelineConversion.java index 235a29299c..49c78eab7e 100644 --- a/tez-plugins/tez-yarn-timeline-history/src/main/java/org/apache/tez/dag/history/logging/ats/HistoryEventTimelineConversion.java +++ b/tez-plugins/tez-yarn-timeline-history/src/main/java/org/apache/tez/dag/history/logging/ats/HistoryEventTimelineConversion.java @@ -32,7 +32,6 @@ import org.apache.tez.dag.api.EdgeProperty; import org.apache.tez.dag.api.TezUncheckedException; import org.apache.tez.dag.api.oldrecords.TaskAttemptState; -import org.apache.tez.dag.api.oldrecords.TaskState; import org.apache.tez.dag.api.records.DAGProtos.CallerContextProto; import org.apache.tez.dag.app.web.AMWebController; import org.apache.tez.dag.history.HistoryEvent; @@ -61,7 +60,9 @@ import com.google.common.collect.Lists; -public class HistoryEventTimelineConversion { +public final class HistoryEventTimelineConversion { + + private HistoryEventTimelineConversion() {} private static void validateEvent(HistoryEvent event) { if (!event.isHistoryEvent()) { @@ -87,8 +88,8 @@ public static List convertToTimelineEntities(HistoryEvent histor convertContainerStoppedEvent((ContainerStoppedEvent) historyEvent)); case DAG_SUBMITTED: return Lists.newArrayList( - convertDAGSubmittedToDAGExtraInfoEntity((DAGSubmittedEvent)historyEvent), - convertDAGSubmittedEvent((DAGSubmittedEvent)historyEvent)); + convertDAGSubmittedEvent((DAGSubmittedEvent)historyEvent), + convertDAGSubmittedToDAGExtraInfoEntity((DAGSubmittedEvent)historyEvent)); case DAG_INITIALIZED: return Collections.singletonList( convertDAGInitializedEvent((DAGInitializedEvent) historyEvent)); @@ -96,8 +97,8 @@ public static List convertToTimelineEntities(HistoryEvent histor return Collections.singletonList(convertDAGStartedEvent((DAGStartedEvent) historyEvent)); case DAG_FINISHED: return Lists.newArrayList( - convertDAGFinishedToDAGExtraInfoEntity((DAGFinishedEvent) historyEvent), - convertDAGFinishedEvent((DAGFinishedEvent) historyEvent)); + convertDAGFinishedEvent((DAGFinishedEvent) historyEvent), + convertDAGFinishedToDAGExtraInfoEntity((DAGFinishedEvent) historyEvent)); case VERTEX_INITIALIZED: return Collections.singletonList( convertVertexInitializedEvent((VertexInitializedEvent) historyEvent)); @@ -288,7 +289,7 @@ private static TimelineEntity convertContainerStoppedEvent(ContainerStoppedEvent private static TimelineEntity convertDAGFinishedEvent(DAGFinishedEvent event) { TimelineEntity atsEntity = new TimelineEntity(); - atsEntity.setEntityId(event.getDagID().toString()); + atsEntity.setEntityId(event.getDAGID().toString()); atsEntity.setEntityType(EntityTypes.TEZ_DAG_ID.name()); TimelineEvent finishEvt = new TimelineEvent(); @@ -298,7 +299,7 @@ private static TimelineEntity convertDAGFinishedEvent(DAGFinishedEvent event) { atsEntity.addPrimaryFilter(ATSConstants.USER, event.getUser()); atsEntity.addPrimaryFilter(ATSConstants.APPLICATION_ID, - event.getDagID().getApplicationId().toString()); + event.getApplicationId().toString()); atsEntity.addPrimaryFilter(ATSConstants.DAG_NAME, event.getDagName()); atsEntity.addPrimaryFilter(ATSConstants.STATUS, event.getState().name()); if (event.getDAGPlan().hasCallerContext() @@ -327,10 +328,10 @@ private static TimelineEntity convertDAGFinishedEvent(DAGFinishedEvent event) { private static TimelineEntity convertDAGFinishedToDAGExtraInfoEntity(DAGFinishedEvent event) { TimelineEntity atsEntity = new TimelineEntity(); - atsEntity.setEntityId(event.getDagID().toString()); + atsEntity.setEntityId(event.getDAGID().toString()); atsEntity.setEntityType(EntityTypes.TEZ_DAG_EXTRA_INFO.name()); - atsEntity.addRelatedEntity(EntityTypes.TEZ_DAG_ID.name(), event.getDagID().toString()); + atsEntity.addRelatedEntity(EntityTypes.TEZ_DAG_ID.name(), event.getDAGID().toString()); TimelineEvent submitEvt = new TimelineEvent(); submitEvt.setEventType(HistoryEventType.DAG_FINISHED.name()); @@ -345,7 +346,7 @@ private static TimelineEntity convertDAGFinishedToDAGExtraInfoEntity(DAGFinished private static TimelineEntity convertDAGInitializedEvent(DAGInitializedEvent event) { TimelineEntity atsEntity = new TimelineEntity(); - atsEntity.setEntityId(event.getDagID().toString()); + atsEntity.setEntityId(event.getDAGID().toString()); atsEntity.setEntityType(EntityTypes.TEZ_DAG_ID.name()); TimelineEvent initEvt = new TimelineEvent(); @@ -355,13 +356,13 @@ private static TimelineEntity convertDAGInitializedEvent(DAGInitializedEvent eve atsEntity.addPrimaryFilter(ATSConstants.USER, event.getUser()); atsEntity.addPrimaryFilter(ATSConstants.APPLICATION_ID, - event.getDagID().getApplicationId().toString()); + event.getApplicationId().toString()); atsEntity.addPrimaryFilter(ATSConstants.DAG_NAME, event.getDagName()); atsEntity.addOtherInfo(ATSConstants.INIT_TIME, event.getInitTime()); if (event.getVertexNameIDMap() != null) { - Map nameIdStrMap = new TreeMap(); + Map nameIdStrMap = new TreeMap<>(); for (Entry entry : event.getVertexNameIDMap().entrySet()) { nameIdStrMap.put(entry.getKey(), entry.getValue().toString()); } @@ -373,7 +374,7 @@ private static TimelineEntity convertDAGInitializedEvent(DAGInitializedEvent eve private static TimelineEntity convertDAGStartedEvent(DAGStartedEvent event) { TimelineEntity atsEntity = new TimelineEntity(); - atsEntity.setEntityId(event.getDagID().toString()); + atsEntity.setEntityId(event.getDAGID().toString()); atsEntity.setEntityType(EntityTypes.TEZ_DAG_ID.name()); TimelineEvent startEvt = new TimelineEvent(); @@ -383,7 +384,7 @@ private static TimelineEntity convertDAGStartedEvent(DAGStartedEvent event) { atsEntity.addPrimaryFilter(ATSConstants.USER, event.getUser()); atsEntity.addPrimaryFilter(ATSConstants.APPLICATION_ID, - event.getDagID().getApplicationId().toString()); + event.getApplicationId().toString()); atsEntity.addPrimaryFilter(ATSConstants.DAG_NAME, event.getDagName()); atsEntity.addOtherInfo(ATSConstants.START_TIME, event.getStartTime()); @@ -394,7 +395,7 @@ private static TimelineEntity convertDAGStartedEvent(DAGStartedEvent event) { private static TimelineEntity convertDAGSubmittedEvent(DAGSubmittedEvent event) { TimelineEntity atsEntity = new TimelineEntity(); - atsEntity.setEntityId(event.getDagID().toString()); + atsEntity.setEntityId(event.getDAGID().toString()); atsEntity.setEntityType(EntityTypes.TEZ_DAG_ID.name()); atsEntity.addRelatedEntity(EntityTypes.TEZ_APPLICATION.name(), @@ -412,7 +413,7 @@ private static TimelineEntity convertDAGSubmittedEvent(DAGSubmittedEvent event) atsEntity.addPrimaryFilter(ATSConstants.USER, event.getUser()); atsEntity.addPrimaryFilter(ATSConstants.DAG_NAME, event.getDAGName()); atsEntity.addPrimaryFilter(ATSConstants.APPLICATION_ID, - event.getDagID().getApplicationId().toString()); + event.getApplicationId().toString()); if (event.getDAGPlan().hasCallerContext() && event.getDAGPlan().getCallerContext().hasCallerId()) { @@ -450,10 +451,10 @@ private static TimelineEntity convertDAGSubmittedEvent(DAGSubmittedEvent event) private static TimelineEntity convertDAGSubmittedToDAGExtraInfoEntity(DAGSubmittedEvent event) { TimelineEntity atsEntity = new TimelineEntity(); - atsEntity.setEntityId(event.getDagID().toString()); + atsEntity.setEntityId(event.getDAGID().toString()); atsEntity.setEntityType(EntityTypes.TEZ_DAG_EXTRA_INFO.name()); - atsEntity.addRelatedEntity(EntityTypes.TEZ_DAG_ID.name(), event.getDagID().toString()); + atsEntity.addRelatedEntity(EntityTypes.TEZ_DAG_ID.name(), event.getDAGID().toString()); TimelineEvent submitEvt = new TimelineEvent(); submitEvt.setEventType(HistoryEventType.DAG_SUBMITTED.name()); @@ -477,13 +478,13 @@ private static TimelineEntity convertTaskAttemptFinishedEvent(TaskAttemptFinishe atsEntity.setEntityType(EntityTypes.TEZ_TASK_ATTEMPT_ID.name()); atsEntity.addPrimaryFilter(ATSConstants.APPLICATION_ID, - event.getTaskAttemptID().getTaskID().getVertexID().getDAGId().getApplicationId().toString()); + event.getApplicationId().toString()); atsEntity.addPrimaryFilter(EntityTypes.TEZ_DAG_ID.name(), - event.getTaskAttemptID().getTaskID().getVertexID().getDAGId().toString()); + event.getDAGID().toString()); atsEntity.addPrimaryFilter(EntityTypes.TEZ_VERTEX_ID.name(), - event.getTaskAttemptID().getTaskID().getVertexID().toString()); + event.getVertexID().toString()); atsEntity.addPrimaryFilter(EntityTypes.TEZ_TASK_ID.name(), - event.getTaskAttemptID().getTaskID().toString()); + event.getTaskID().toString()); TimelineEvent finishEvt = new TimelineEvent(); finishEvt.setEventType(HistoryEventType.TASK_ATTEMPT_FINISHED.name()); @@ -542,16 +543,16 @@ private static TimelineEntity convertTaskAttemptStartedEvent(TaskAttemptStartedE atsEntity.setStartTime(event.getStartTime()); atsEntity.addRelatedEntity(EntityTypes.TEZ_TASK_ID.name(), - event.getTaskAttemptID().getTaskID().toString()); + event.getTaskID().toString()); atsEntity.addPrimaryFilter(ATSConstants.APPLICATION_ID, - event.getTaskAttemptID().getTaskID().getVertexID().getDAGId().getApplicationId().toString()); + event.getApplicationId().toString()); atsEntity.addPrimaryFilter(EntityTypes.TEZ_DAG_ID.name(), - event.getTaskAttemptID().getTaskID().getVertexID().getDAGId().toString()); + event.getDAGID().toString()); atsEntity.addPrimaryFilter(EntityTypes.TEZ_VERTEX_ID.name(), - event.getTaskAttemptID().getTaskID().getVertexID().toString()); + event.getVertexID().toString()); atsEntity.addPrimaryFilter(EntityTypes.TEZ_TASK_ID.name(), - event.getTaskAttemptID().getTaskID().toString()); + event.getTaskID().toString()); TimelineEvent startEvt = new TimelineEvent(); startEvt.setEventType(HistoryEventType.TASK_ATTEMPT_STARTED.name()); @@ -579,11 +580,11 @@ private static TimelineEntity convertTaskFinishedEvent(TaskFinishedEvent event) atsEntity.setEntityType(EntityTypes.TEZ_TASK_ID.name()); atsEntity.addPrimaryFilter(ATSConstants.APPLICATION_ID, - event.getTaskID().getVertexID().getDAGId().getApplicationId().toString()); + event.getApplicationId().toString()); atsEntity.addPrimaryFilter(EntityTypes.TEZ_DAG_ID.name(), - event.getTaskID().getVertexID().getDAGId().toString()); + event.getDAGID().toString()); atsEntity.addPrimaryFilter(EntityTypes.TEZ_VERTEX_ID.name(), - event.getTaskID().getVertexID().toString()); + event.getVertexID().toString()); TimelineEvent finishEvt = new TimelineEvent(); finishEvt.setEventType(HistoryEventType.TASK_FINISHED.name()); @@ -614,14 +615,14 @@ private static TimelineEntity convertTaskStartedEvent(TaskStartedEvent event) { atsEntity.setEntityType(EntityTypes.TEZ_TASK_ID.name()); atsEntity.addRelatedEntity(EntityTypes.TEZ_VERTEX_ID.name(), - event.getTaskID().getVertexID().toString()); + event.getVertexID().toString()); atsEntity.addPrimaryFilter(ATSConstants.APPLICATION_ID, - event.getTaskID().getVertexID().getDAGId().getApplicationId().toString()); + event.getApplicationId().toString()); atsEntity.addPrimaryFilter(EntityTypes.TEZ_DAG_ID.name(), - event.getTaskID().getVertexID().getDAGId().toString()); + event.getDAGID().toString()); atsEntity.addPrimaryFilter(EntityTypes.TEZ_VERTEX_ID.name(), - event.getTaskID().getVertexID().toString()); + event.getVertexID().toString()); TimelineEvent startEvt = new TimelineEvent(); startEvt.setEventType(HistoryEventType.TASK_STARTED.name()); @@ -632,7 +633,7 @@ private static TimelineEntity convertTaskStartedEvent(TaskStartedEvent event) { atsEntity.addOtherInfo(ATSConstants.START_TIME, event.getStartTime()); atsEntity.addOtherInfo(ATSConstants.SCHEDULED_TIME, event.getScheduledTime()); - atsEntity.addOtherInfo(ATSConstants.STATUS, TaskState.SCHEDULED.name()); + atsEntity.addOtherInfo(ATSConstants.STATUS, event.getState().name()); return atsEntity; } @@ -643,9 +644,9 @@ private static TimelineEntity convertVertexFinishedEvent(VertexFinishedEvent eve atsEntity.setEntityType(EntityTypes.TEZ_VERTEX_ID.name()); atsEntity.addPrimaryFilter(ATSConstants.APPLICATION_ID, - event.getVertexID().getDAGId().getApplicationId().toString()); + event.getApplicationId().toString()); atsEntity.addPrimaryFilter(EntityTypes.TEZ_DAG_ID.name(), - event.getVertexID().getDAGId().toString()); + event.getDAGID().toString()); TimelineEvent finishEvt = new TimelineEvent(); finishEvt.setEventType(HistoryEventType.VERTEX_FINISHED.name()); @@ -685,12 +686,12 @@ private static TimelineEntity convertVertexInitializedEvent(VertexInitializedEve atsEntity.setEntityType(EntityTypes.TEZ_VERTEX_ID.name()); atsEntity.addRelatedEntity(EntityTypes.TEZ_DAG_ID.name(), - event.getVertexID().getDAGId().toString()); + event.getDAGID().toString()); atsEntity.addPrimaryFilter(ATSConstants.APPLICATION_ID, - event.getVertexID().getDAGId().getApplicationId().toString()); + event.getApplicationId().toString()); atsEntity.addPrimaryFilter(EntityTypes.TEZ_DAG_ID.name(), - event.getVertexID().getDAGId().toString()); + event.getDAGID().toString()); TimelineEvent initEvt = new TimelineEvent(); initEvt.setEventType(HistoryEventType.VERTEX_INITIALIZED.name()); @@ -718,9 +719,9 @@ private static TimelineEntity convertVertexStartedEvent(VertexStartedEvent event atsEntity.setEntityType(EntityTypes.TEZ_VERTEX_ID.name()); atsEntity.addPrimaryFilter(ATSConstants.APPLICATION_ID, - event.getVertexID().getDAGId().getApplicationId().toString()); + event.getApplicationId().toString()); atsEntity.addPrimaryFilter(EntityTypes.TEZ_DAG_ID.name(), - event.getVertexID().getDAGId().toString()); + event.getDAGID().toString()); TimelineEvent startEvt = new TimelineEvent(); startEvt.setEventType(HistoryEventType.VERTEX_STARTED.name()); @@ -741,17 +742,17 @@ private static TimelineEntity convertVertexReconfigureDoneEvent( atsEntity.setEntityType(EntityTypes.TEZ_VERTEX_ID.name()); atsEntity.addPrimaryFilter(ATSConstants.APPLICATION_ID, - event.getVertexID().getDAGId().getApplicationId().toString()); + event.getApplicationId().toString()); atsEntity.addPrimaryFilter(EntityTypes.TEZ_DAG_ID.name(), - event.getVertexID().getDAGId().toString()); + event.getDAGID().toString()); TimelineEvent updateEvt = new TimelineEvent(); updateEvt.setEventType(HistoryEventType.VERTEX_CONFIGURE_DONE.name()); updateEvt.setTimestamp(event.getReconfigureDoneTime()); - Map eventInfo = new HashMap(); + Map eventInfo = new HashMap<>(); if (event.getSourceEdgeProperties() != null && !event.getSourceEdgeProperties().isEmpty()) { - Map updatedEdgeManagers = new HashMap(); + Map updatedEdgeManagers = new HashMap<>(); for (Entry entry : event.getSourceEdgeProperties().entrySet()) { updatedEdgeManagers.put(entry.getKey(), diff --git a/tez-plugins/tez-yarn-timeline-history/src/main/javadoc/resources/META-INF/LICENSE.txt b/tez-plugins/tez-yarn-timeline-history/src/main/javadoc/resources/META-INF/LICENSE similarity index 100% rename from tez-plugins/tez-yarn-timeline-history/src/main/javadoc/resources/META-INF/LICENSE.txt rename to tez-plugins/tez-yarn-timeline-history/src/main/javadoc/resources/META-INF/LICENSE diff --git a/tez-plugins/tez-yarn-timeline-history/src/main/javadoc/resources/META-INF/NOTICE b/tez-plugins/tez-yarn-timeline-history/src/main/javadoc/resources/META-INF/NOTICE new file mode 100644 index 0000000000..2595905699 --- /dev/null +++ b/tez-plugins/tez-yarn-timeline-history/src/main/javadoc/resources/META-INF/NOTICE @@ -0,0 +1,6 @@ +Apache Tez +Copyright 2014-2024 The Apache Software Foundation + +This product includes software developed at +The Apache Software Foundation (http://www.apache.org/). + diff --git a/tez-plugins/tez-yarn-timeline-history/src/main/javadoc/resources/META-INF/NOTICE.txt b/tez-plugins/tez-yarn-timeline-history/src/main/javadoc/resources/META-INF/NOTICE.txt deleted file mode 100644 index 3f36fcc6ba..0000000000 --- a/tez-plugins/tez-yarn-timeline-history/src/main/javadoc/resources/META-INF/NOTICE.txt +++ /dev/null @@ -1,6 +0,0 @@ -Apache Tez -Copyright (c) 2016 The Apache Software Foundation - -This product includes software developed at -The Apache Software Foundation (http://www.apache.org/). - diff --git a/tez-plugins/tez-yarn-timeline-history/src/main/resources/META-INF/LICENSE.txt b/tez-plugins/tez-yarn-timeline-history/src/main/resources/META-INF/LICENSE similarity index 100% rename from tez-plugins/tez-yarn-timeline-history/src/main/resources/META-INF/LICENSE.txt rename to tez-plugins/tez-yarn-timeline-history/src/main/resources/META-INF/LICENSE diff --git a/tez-plugins/tez-yarn-timeline-history/src/main/resources/META-INF/NOTICE b/tez-plugins/tez-yarn-timeline-history/src/main/resources/META-INF/NOTICE new file mode 100644 index 0000000000..2595905699 --- /dev/null +++ b/tez-plugins/tez-yarn-timeline-history/src/main/resources/META-INF/NOTICE @@ -0,0 +1,6 @@ +Apache Tez +Copyright 2014-2024 The Apache Software Foundation + +This product includes software developed at +The Apache Software Foundation (http://www.apache.org/). + diff --git a/tez-plugins/tez-yarn-timeline-history/src/main/resources/META-INF/NOTICE.txt b/tez-plugins/tez-yarn-timeline-history/src/main/resources/META-INF/NOTICE.txt deleted file mode 100644 index 3f36fcc6ba..0000000000 --- a/tez-plugins/tez-yarn-timeline-history/src/main/resources/META-INF/NOTICE.txt +++ /dev/null @@ -1,6 +0,0 @@ -Apache Tez -Copyright (c) 2016 The Apache Software Foundation - -This product includes software developed at -The Apache Software Foundation (http://www.apache.org/). - diff --git a/tez-plugins/tez-yarn-timeline-history/src/test/java/org/apache/tez/dag/history/logging/ats/TestATSHistoryLoggingService.java b/tez-plugins/tez-yarn-timeline-history/src/test/java/org/apache/tez/dag/history/logging/ats/TestATSHistoryLoggingService.java index 6603f4f668..0536568ab4 100644 --- a/tez-plugins/tez-yarn-timeline-history/src/test/java/org/apache/tez/dag/history/logging/ats/TestATSHistoryLoggingService.java +++ b/tez-plugins/tez-yarn-timeline-history/src/test/java/org/apache/tez/dag/history/logging/ats/TestATSHistoryLoggingService.java @@ -18,6 +18,8 @@ package org.apache.tez.dag.history.logging.ats; +import org.mockito.invocation.InvocationOnMock; +import org.mockito.stubbing.Answer; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; @@ -25,11 +27,9 @@ import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.NodeId; -import org.apache.hadoop.yarn.api.records.timeline.TimelineEntity; import org.apache.hadoop.yarn.client.api.TimelineClient; import org.apache.hadoop.yarn.util.SystemClock; import org.apache.hadoop.yarn.conf.YarnConfiguration; -import org.apache.tez.common.security.DAGAccessControls; import org.apache.tez.common.security.HistoryACLPolicyManager; import org.apache.tez.dag.api.TezConfiguration; import org.apache.tez.dag.api.records.DAGProtos.DAGPlan; @@ -49,13 +49,9 @@ import org.junit.Assert; import org.junit.Before; import org.junit.Test; -import org.mockito.Matchers; -import org.mockito.invocation.InvocationOnMock; -import org.mockito.stubbing.Answer; -import static org.mockito.Matchers.any; -import static org.mockito.Matchers.eq; import static org.mockito.Mockito.any; +import static org.mockito.Mockito.eq; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.times; import static org.mockito.Mockito.verify; @@ -102,7 +98,7 @@ public void setup() throws Exception { when(appContext.getCurrentDAGID()).thenReturn(null); when(appContext.getApplicationID()).thenReturn(appId); when(atsHistoryLoggingService.timelineClient.putEntities( - Matchers.anyVararg())).thenAnswer( + any())).thenAnswer( new Answer() { @Override public Object answer(InvocationOnMock invocation) throws Throwable { @@ -184,8 +180,7 @@ public void testTimelineServiceDisable() throws Exception { atsHistoryLoggingService1.setAppContext(appContext); atsHistoryLoggingService1.timelineClient = mock(TimelineClient.class); - when(atsHistoryLoggingService1.timelineClient.putEntities( - Matchers.anyVararg())).thenAnswer( + when(atsHistoryLoggingService1.timelineClient.putEntities(any())).thenAnswer( new Answer() { @Override public Object answer(InvocationOnMock invocation) throws Throwable { @@ -227,12 +222,11 @@ public Object answer(InvocationOnMock invocation) throws Throwable { @Test(timeout=10000) public void testNonSessionDomains() throws Exception { - when(historyACLPolicyManager.setupSessionACLs((Configuration)any(), (ApplicationId)any())) - .thenReturn( - Collections.singletonMap(TezConfiguration.YARN_ATS_ACL_SESSION_DOMAIN_ID, "session-id")); + when(historyACLPolicyManager.setupSessionACLs(any(), any())) + .thenReturn( + Collections.singletonMap(TezConfiguration.YARN_ATS_ACL_SESSION_DOMAIN_ID, "session-id")); atsHistoryLoggingService.start(); - verify(historyACLPolicyManager, times(1)).setupSessionACLs( - (Configuration)any(), (ApplicationId)any()); + verify(historyACLPolicyManager, times(1)).setupSessionACLs(any(), any()); // Send the event and wait for completion. TezDAGID dagId1 = TezDAGID.getInstance(appId, 0); @@ -245,7 +239,7 @@ public void testNonSessionDomains() throws Exception { } // No dag domain were created. verify(historyACLPolicyManager, times(0)) - .setupSessionDAGACLs((Configuration)any(), eq(appId), eq("0"), (DAGAccessControls)any()); + .setupSessionDAGACLs(any(), eq(appId), eq("0"), any()); // All calls made with session domain id. verify(historyACLPolicyManager, times(6)).updateTimelineEntityDomain(any(), eq("session-id")); @@ -253,11 +247,10 @@ public void testNonSessionDomains() throws Exception { @Test(timeout=10000) public void testNonSessionDomainsFailed() throws Exception { - when(historyACLPolicyManager.setupSessionACLs((Configuration)any(), (ApplicationId)any())) - .thenThrow(new IOException()); + when(historyACLPolicyManager.setupSessionACLs(any(), any())) + .thenThrow(new IOException()); atsHistoryLoggingService.start(); - verify(historyACLPolicyManager, times(1)).setupSessionACLs( - (Configuration)any(), (ApplicationId)any()); + verify(historyACLPolicyManager, times(1)).setupSessionACLs(any(), any()); // Send the event and wait for completion. TezDAGID dagId1 = TezDAGID.getInstance(appId, 0); @@ -269,7 +262,7 @@ public void testNonSessionDomainsFailed() throws Exception { } // No dag domain were created. verify(historyACLPolicyManager, times(0)) - .setupSessionDAGACLs((Configuration)any(), eq(appId), eq("0"), (DAGAccessControls)any()); + .setupSessionDAGACLs(any(), eq(appId), eq("0"), any()); // All calls made with session domain id. verify(historyACLPolicyManager, times(0)).updateTimelineEntityDomain(any(), eq("session-id")); @@ -278,11 +271,10 @@ public void testNonSessionDomainsFailed() throws Exception { @Test(timeout=10000) public void testNonSessionDomainsAclNull() throws Exception { - when(historyACLPolicyManager.setupSessionACLs((Configuration)any(), (ApplicationId)any())) - .thenReturn(null); + when(historyACLPolicyManager.setupSessionACLs(any(), any())) + .thenReturn(null); atsHistoryLoggingService.start(); - verify(historyACLPolicyManager, times(1)).setupSessionACLs( - (Configuration)any(), (ApplicationId)any()); + verify(historyACLPolicyManager, times(1)).setupSessionACLs(any(), any()); // Send the event and wait for completion. TezDAGID dagId1 = TezDAGID.getInstance(appId, 0); @@ -295,7 +287,7 @@ public void testNonSessionDomainsAclNull() throws Exception { } // No dag domain were created. verify(historyACLPolicyManager, times(0)) - .setupSessionDAGACLs((Configuration)any(), eq(appId), eq("0"), (DAGAccessControls)any()); + .setupSessionDAGACLs(any(), eq(appId), eq("0"), any()); // All calls made with session domain id. verify(historyACLPolicyManager, times(0)).updateTimelineEntityDomain(any(), eq("session-id")); @@ -304,19 +296,15 @@ public void testNonSessionDomainsAclNull() throws Exception { @Test(timeout=10000) public void testSessionDomains() throws Exception { - when(historyACLPolicyManager.setupSessionACLs((Configuration)any(), (ApplicationId)any())) - .thenReturn( - Collections.singletonMap(TezConfiguration.YARN_ATS_ACL_SESSION_DOMAIN_ID, "test-domain")); + when(historyACLPolicyManager.setupSessionACLs(any(), any())) + .thenReturn(Collections.singletonMap(TezConfiguration.YARN_ATS_ACL_SESSION_DOMAIN_ID, "test-domain")); - when(historyACLPolicyManager.setupSessionDAGACLs( - (Configuration)any(), (ApplicationId)any(), eq("0"), (DAGAccessControls)any())) - .thenReturn( - Collections.singletonMap(TezConfiguration.YARN_ATS_ACL_DAG_DOMAIN_ID, "dag-domain")); + when(historyACLPolicyManager.setupSessionDAGACLs(any(), any(), eq("0"), any())) + .thenReturn(Collections.singletonMap(TezConfiguration.YARN_ATS_ACL_DAG_DOMAIN_ID, "dag-domain")); when(appContext.isSession()).thenReturn(true); atsHistoryLoggingService.start(); - verify(historyACLPolicyManager, times(1)).setupSessionACLs((Configuration)any(), - (ApplicationId)any()); + verify(historyACLPolicyManager, times(1)).setupSessionACLs(any(), any()); // Send the event and wait for completion. TezDAGID dagId1 = TezDAGID.getInstance(appId, 0); @@ -329,7 +317,7 @@ public void testSessionDomains() throws Exception { } // No dag domain were created. verify(historyACLPolicyManager, times(1)) - .setupSessionDAGACLs((Configuration)any(), eq(appId), eq("0"), (DAGAccessControls)any()); + .setupSessionDAGACLs(any(), eq(appId), eq("0"), any()); // All calls made with session domain id. verify(historyACLPolicyManager, times(1)).updateTimelineEntityDomain(any(), eq("test-domain")); @@ -338,18 +326,15 @@ public void testSessionDomains() throws Exception { @Test(timeout=10000) public void testSessionDomainsFailed() throws Exception { - when(historyACLPolicyManager.setupSessionACLs((Configuration)any(), (ApplicationId)any())) - .thenThrow(new IOException()); + when(historyACLPolicyManager.setupSessionACLs(any(), any())) + .thenThrow(new IOException()); - when(historyACLPolicyManager.setupSessionDAGACLs( - (Configuration)any(), (ApplicationId)any(), eq("0"), (DAGAccessControls)any())) - .thenReturn( - Collections.singletonMap(TezConfiguration.YARN_ATS_ACL_DAG_DOMAIN_ID, "dag-domain")); + when(historyACLPolicyManager.setupSessionDAGACLs(any(), any(), eq("0"), any())) + .thenReturn(Collections.singletonMap(TezConfiguration.YARN_ATS_ACL_DAG_DOMAIN_ID, "dag-domain")); when(appContext.isSession()).thenReturn(true); atsHistoryLoggingService.start(); - verify(historyACLPolicyManager, times(1)).setupSessionACLs((Configuration)any(), - (ApplicationId)any()); + verify(historyACLPolicyManager, times(1)).setupSessionACLs(any(), any()); // Send the event and wait for completion. TezDAGID dagId1 = TezDAGID.getInstance(appId, 0); @@ -361,27 +346,24 @@ public void testSessionDomainsFailed() throws Exception { } // No dag domain were created. verify(historyACLPolicyManager, times(0)) - .setupSessionDAGACLs((Configuration)any(), eq(appId), eq("0"), (DAGAccessControls)any()); + .setupSessionDAGACLs(any(), eq(appId), eq("0"), any()); // No calls were made for domains. - verify(historyACLPolicyManager, times(0)).updateTimelineEntityDomain(any(), (String)any()); + verify(historyACLPolicyManager, times(0)).updateTimelineEntityDomain(any(), any()); Assert.assertEquals(0, atsEntitiesCounter); } @Test(timeout=10000) public void testSessionDomainsDagFailed() throws Exception { - when(historyACLPolicyManager.setupSessionACLs((Configuration)any(), (ApplicationId)any())) - .thenReturn(Collections.singletonMap( - TezConfiguration.YARN_ATS_ACL_SESSION_DOMAIN_ID, "session-domain")); + when(historyACLPolicyManager.setupSessionACLs(any(), any())) + .thenReturn(Collections.singletonMap(TezConfiguration.YARN_ATS_ACL_SESSION_DOMAIN_ID, "session-domain")); - when(historyACLPolicyManager.setupSessionDAGACLs( - (Configuration)any(), (ApplicationId)any(), eq("0"), (DAGAccessControls)any())) - .thenThrow(new IOException()); + when(historyACLPolicyManager.setupSessionDAGACLs(any(), any(), eq("0"), any())) + .thenThrow(new IOException()); when(appContext.isSession()).thenReturn(true); atsHistoryLoggingService.start(); - verify(historyACLPolicyManager, times(1)).setupSessionACLs((Configuration)any(), - (ApplicationId)any()); + verify(historyACLPolicyManager, times(1)).setupSessionACLs(any(), any()); // Send the event and wait for completion. TezDAGID dagId1 = TezDAGID.getInstance(appId, 0); @@ -394,29 +376,27 @@ public void testSessionDomainsDagFailed() throws Exception { } // DAG domain was called once. verify(historyACLPolicyManager, times(1)) - .setupSessionDAGACLs((Configuration)any(), eq(appId), eq("0"), (DAGAccessControls)any()); + .setupSessionDAGACLs(any(), eq(appId), eq("0"), any()); // All calls made with session domain id. verify(historyACLPolicyManager, times(1)) .updateTimelineEntityDomain(any(), eq("session-domain")); verify(historyACLPolicyManager, times(1)) - .updateTimelineEntityDomain(any(), (String)any()); + .updateTimelineEntityDomain(any(), any()); Assert.assertEquals(1, atsEntitiesCounter); } @Test(timeout=10000) public void testSessionDomainsAclNull() throws Exception { - when(historyACLPolicyManager.setupSessionACLs((Configuration)any(), (ApplicationId)any())) - .thenReturn(null); + when(historyACLPolicyManager.setupSessionACLs(any(), any())) + .thenReturn(null); - when(historyACLPolicyManager.setupSessionDAGACLs( - (Configuration)any(), (ApplicationId)any(), eq("0"), (DAGAccessControls)any())) - .thenReturn(null); + when(historyACLPolicyManager.setupSessionDAGACLs(any(), any(), eq("0"), any())) + .thenReturn(null); when(appContext.isSession()).thenReturn(true); atsHistoryLoggingService.start(); - verify(historyACLPolicyManager, times(1)).setupSessionACLs((Configuration)any(), - (ApplicationId)any()); + verify(historyACLPolicyManager, times(1)).setupSessionACLs(any(), any()); // Send the event and wait for completion. TezDAGID dagId1 = TezDAGID.getInstance(appId, 0); @@ -429,10 +409,10 @@ public void testSessionDomainsAclNull() throws Exception { } // No dag domain were created. verify(historyACLPolicyManager, times(1)) - .setupSessionDAGACLs((Configuration)any(), eq(appId), eq("0"), (DAGAccessControls)any()); + .setupSessionDAGACLs(any(), eq(appId), eq("0"), any()); // All calls made with session domain id. - verify(historyACLPolicyManager, times(0)).updateTimelineEntityDomain(any(), (String)any()); + verify(historyACLPolicyManager, times(0)).updateTimelineEntityDomain(any(), any()); Assert.assertEquals(6, atsEntitiesCounter); } diff --git a/tez-plugins/tez-yarn-timeline-history/src/test/java/org/apache/tez/tests/MiniTezClusterWithTimeline.java b/tez-plugins/tez-yarn-timeline-history/src/test/java/org/apache/tez/tests/MiniTezClusterWithTimeline.java index d13ebdbee2..f8a35a29c3 100644 --- a/tez-plugins/tez-yarn-timeline-history/src/test/java/org/apache/tez/tests/MiniTezClusterWithTimeline.java +++ b/tez-plugins/tez-yarn-timeline-history/src/test/java/org/apache/tez/tests/MiniTezClusterWithTimeline.java @@ -49,6 +49,7 @@ import org.apache.hadoop.yarn.server.MiniYARNCluster; import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor; import org.apache.hadoop.yarn.server.nodemanager.DefaultContainerExecutor; +import org.apache.tez.common.TezTestUtils; import org.apache.tez.dag.api.TezConfiguration; import org.apache.tez.dag.api.TezUncheckedException; import org.apache.tez.dag.app.DAGAppMaster; @@ -92,6 +93,12 @@ public MiniTezClusterWithTimeline(String testName, int noOfNMs, super(testName, 1, noOfNMs, numLocalDirs, numLogDirs, enableAHS); } + @Override + public void init(Configuration conf) { + TezTestUtils.ensureHighDiskUtilizationLimit(conf); + super.init(conf); + } + @Override public void serviceInit(Configuration conf) throws Exception { conf.set(MRConfig.FRAMEWORK_NAME, MRConfig.YARN_TEZ_FRAMEWORK_NAME); diff --git a/tez-plugins/tez-yarn-timeline-history/src/test/resources/META-INF/LICENSE.txt b/tez-plugins/tez-yarn-timeline-history/src/test/resources/META-INF/LICENSE similarity index 100% rename from tez-plugins/tez-yarn-timeline-history/src/test/resources/META-INF/LICENSE.txt rename to tez-plugins/tez-yarn-timeline-history/src/test/resources/META-INF/LICENSE diff --git a/tez-plugins/tez-yarn-timeline-history/src/test/resources/META-INF/NOTICE b/tez-plugins/tez-yarn-timeline-history/src/test/resources/META-INF/NOTICE new file mode 100644 index 0000000000..2595905699 --- /dev/null +++ b/tez-plugins/tez-yarn-timeline-history/src/test/resources/META-INF/NOTICE @@ -0,0 +1,6 @@ +Apache Tez +Copyright 2014-2024 The Apache Software Foundation + +This product includes software developed at +The Apache Software Foundation (http://www.apache.org/). + diff --git a/tez-plugins/tez-yarn-timeline-history/src/test/resources/META-INF/NOTICE.txt b/tez-plugins/tez-yarn-timeline-history/src/test/resources/META-INF/NOTICE.txt deleted file mode 100644 index 3f36fcc6ba..0000000000 --- a/tez-plugins/tez-yarn-timeline-history/src/test/resources/META-INF/NOTICE.txt +++ /dev/null @@ -1,6 +0,0 @@ -Apache Tez -Copyright (c) 2016 The Apache Software Foundation - -This product includes software developed at -The Apache Software Foundation (http://www.apache.org/). - diff --git a/tez-runtime-internals/pom.xml b/tez-runtime-internals/pom.xml index 0cef94bb1c..e235e933cd 100644 --- a/tez-runtime-internals/pom.xml +++ b/tez-runtime-internals/pom.xml @@ -20,11 +20,15 @@ org.apache.tez tez - 0.9.1-SNAPSHOT + 0.10.5-SNAPSHOT tez-runtime-internals + + commons-lang + commons-lang + org.apache.tez tez-api @@ -45,6 +49,23 @@ org.apache.hadoop hadoop-common + + org.apache.hadoop + hadoop-common + test + test-jar + + + org.apache.hadoop + hadoop-hdfs + test + + + org.apache.hadoop + hadoop-hdfs + test + test-jar + org.apache.hadoop hadoop-yarn-api @@ -71,12 +92,12 @@ org.mockito - mockito-all + mockito-core test org.slf4j - slf4j-log4j12 + slf4j-reload4j @@ -87,28 +108,26 @@ apache-rat-plugin - org.apache.hadoop - hadoop-maven-plugins + com.github.os72 + protoc-jar-maven-plugin - compile-protoc generate-sources - protoc + run - ${protobuf.version} + com.google.protobuf:protoc:${protobuf.version} ${protoc.path} - - ${basedir}/src/main/proto - - - ${basedir}/src/main/proto - - RuntimeEvents.proto - - - ${project.build.directory}/generated-sources/java + none + + ${basedir}/src/main/proto + + + + ${project.build.directory}/generated-sources/java + + diff --git a/tez-runtime-internals/src/main/java/org/apache/tez/common/ProtoConverters.java b/tez-runtime-internals/src/main/java/org/apache/tez/common/ProtoConverters.java index ea90158d52..bf68944143 100644 --- a/tez-runtime-internals/src/main/java/org/apache/tez/common/ProtoConverters.java +++ b/tez-runtime-internals/src/main/java/org/apache/tez/common/ProtoConverters.java @@ -18,9 +18,13 @@ package org.apache.tez.common; +import com.google.common.base.Charsets; import com.google.protobuf.ByteString; +import java.nio.ByteBuffer; + import org.apache.tez.runtime.api.events.CompositeDataMovementEvent; +import org.apache.tez.runtime.api.events.CustomProcessorEvent; import org.apache.tez.runtime.api.events.DataMovementEvent; import org.apache.tez.runtime.api.events.CompositeRoutedDataMovementEvent; import org.apache.tez.runtime.api.events.EventProtos; @@ -29,7 +33,24 @@ import org.apache.tez.runtime.api.events.VertexManagerEvent; import org.apache.tez.runtime.api.events.EventProtos.VertexManagerEventProto; -public class ProtoConverters { +public final class ProtoConverters { + + public static EventProtos.CustomProcessorEventProto convertCustomProcessorEventToProto( + CustomProcessorEvent event) { + EventProtos.CustomProcessorEventProto.Builder builder = + EventProtos.CustomProcessorEventProto.newBuilder(); + if (event.getPayload() != null) { + builder.setUserPayload(ByteString.copyFrom(event.getPayload())); + } + builder.setVersion(event.getVersion()); + return builder.build(); + } + + public static CustomProcessorEvent convertCustomProcessorEventFromProto( + EventProtos.CustomProcessorEventProto proto) { + return CustomProcessorEvent.create(proto.getUserPayload() != null ? + proto.getUserPayload().asReadOnlyByteBuffer() : null, proto.getVersion()); + } public static EventProtos.DataMovementEventProto convertDataMovementEventToProto( DataMovementEvent event) { @@ -117,15 +138,22 @@ public static VertexManagerEvent convertVertexManagerEventFromProto( if (event.getUserPayload() != null) { builder.setUserPayload(ByteString.copyFrom(event.getUserPayload())); } + if (event.getSerializedPath() != null) { + builder.setSerializedPath(ByteString.copyFrom(event.getSerializedPath().getBytes(Charsets.UTF_8))); + } return builder.build(); } - public static InputDataInformationEvent - convertRootInputDataInformationEventFromProto( + public static InputDataInformationEvent convertRootInputDataInformationEventFromProto( EventProtos.RootInputDataInformationEventProto proto) { - InputDataInformationEvent diEvent = InputDataInformationEvent.createWithSerializedPayload( - proto.getSourceIndex(), - proto.hasUserPayload() ? proto.getUserPayload().asReadOnlyByteBuffer() : null); + ByteBuffer payload = proto.hasUserPayload() ? proto.getUserPayload().asReadOnlyByteBuffer() : null; + InputDataInformationEvent diEvent = null; + if (proto.hasSerializedPath()) { + diEvent = InputDataInformationEvent.createWithSerializedPath(proto.getSourceIndex(), + proto.getSerializedPath().toStringUtf8()); + } else { + diEvent = InputDataInformationEvent.createWithSerializedPayload(proto.getSourceIndex(), payload); + } diEvent.setTargetIndex(proto.getTargetIndex()); return diEvent; } @@ -150,4 +178,5 @@ public static InputInitializerEvent convertRootInputInitializerEventFromProto( return event; } + private ProtoConverters() {} } diff --git a/tez-runtime-internals/src/main/java/org/apache/tez/common/TezConverterUtils.java b/tez-runtime-internals/src/main/java/org/apache/tez/common/TezConverterUtils.java index 02dc69c3e5..933115757d 100644 --- a/tez-runtime-internals/src/main/java/org/apache/tez/common/TezConverterUtils.java +++ b/tez-runtime-internals/src/main/java/org/apache/tez/common/TezConverterUtils.java @@ -28,7 +28,9 @@ import org.apache.tez.runtime.api.TaskFailureType; import org.apache.tez.runtime.internals.api.events.SystemEventProtos.TaskFailureTypeProto; -public class TezConverterUtils { +public final class TezConverterUtils { + + private TezConverterUtils() {} /** * return a {@link URI} from a given url @@ -36,7 +38,6 @@ public class TezConverterUtils { * @param url * url to convert * @return path from {@link URL} - * @throws URISyntaxException */ @Private public static URI getURIFromYarnURL(URL url) throws URISyntaxException { diff --git a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/InputReadyTracker.java b/tez-runtime-internals/src/main/java/org/apache/tez/runtime/InputReadyTracker.java index ba4fe1d170..b2b2b58e15 100644 --- a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/InputReadyTracker.java +++ b/tez-runtime-internals/src/main/java/org/apache/tez/runtime/InputReadyTracker.java @@ -33,7 +33,7 @@ import org.apache.tez.runtime.api.Input; import org.apache.tez.runtime.api.MergedLogicalInput; -import com.google.common.base.Preconditions; +import org.apache.tez.common.Preconditions; import com.google.common.collect.Lists; import com.google.common.collect.Maps; diff --git a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/LogicalIOProcessorRuntimeTask.java b/tez-runtime-internals/src/main/java/org/apache/tez/runtime/LogicalIOProcessorRuntimeTask.java index 5c2ab77e2a..ac67789e4e 100644 --- a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/LogicalIOProcessorRuntimeTask.java +++ b/tez-runtime-internals/src/main/java/org/apache/tez/runtime/LogicalIOProcessorRuntimeTask.java @@ -43,6 +43,7 @@ import java.util.concurrent.Executors; import java.util.concurrent.Future; import java.util.concurrent.LinkedBlockingQueue; +import java.util.concurrent.TimeUnit; import org.apache.commons.lang.exception.ExceptionUtils; import org.apache.tez.hadoop.shim.HadoopShim; @@ -57,6 +58,7 @@ import org.apache.tez.common.ReflectionUtils; import org.apache.tez.common.RunnableWithNdc; import org.apache.tez.common.TezExecutors; +import org.apache.tez.common.counters.TaskCounter; import org.apache.tez.dag.api.InputDescriptor; import org.apache.tez.dag.api.OutputDescriptor; import org.apache.tez.dag.api.ProcessorDescriptor; @@ -96,7 +98,7 @@ import org.apache.tez.runtime.common.resources.MemoryDistributor; import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; +import org.apache.tez.common.Preconditions; import com.google.common.collect.Lists; import com.google.common.collect.Multimap; import com.google.common.collect.Sets; @@ -160,6 +162,8 @@ public class LogicalIOProcessorRuntimeTask extends RuntimeTask { private final boolean initializeProcessorFirst; private final boolean initializeProcessorIOSerially; private final TezExecutors sharedExecutor; + /** nanoTime of the task initialization start. */ + private Long initStartTimeNs = null; public LogicalIOProcessorRuntimeTask(TaskSpec taskSpec, int appAttemptNumber, Configuration tezConf, String[] localDirs, TezUmbilical tezUmbilical, @@ -229,6 +233,9 @@ public LogicalIOProcessorRuntimeTask(TaskSpec taskSpec, int appAttemptNumber, public void initialize() throws Exception { Preconditions.checkState(this.state.get() == State.NEW, "Already initialized"); this.state.set(State.INITED); + if (this.tezCounters != null) { + this.initStartTimeNs = System.nanoTime(); + } this.processorContext = createProcessorContext(); this.processor = createProcessor(processorDescriptor.getClassName(), processorContext); @@ -380,30 +387,43 @@ public void close() throws Exception { "Can only run while in RUNNING state. Current: " + this.state); this.state.set(State.CLOSED); + + List> allCloseInputEvents = Lists.newArrayList(); // Close the Inputs. for (InputSpec inputSpec : inputSpecs) { String srcVertexName = inputSpec.getSourceVertexName(); initializedInputs.remove(srcVertexName); List closeInputEvents = ((InputFrameworkInterface)inputsMap.get(srcVertexName)).close(); - sendTaskGeneratedEvents(closeInputEvents, - EventProducerConsumerType.INPUT, taskSpec.getVertexName(), - srcVertexName, taskSpec.getTaskAttemptID()); + allCloseInputEvents.add(closeInputEvents); } + List> allCloseOutputEvents = Lists.newArrayList(); // Close the Outputs. for (OutputSpec outputSpec : outputSpecs) { String destVertexName = outputSpec.getDestinationVertexName(); initializedOutputs.remove(destVertexName); List closeOutputEvents = ((LogicalOutputFrameworkInterface)outputsMap.get(destVertexName)).close(); - sendTaskGeneratedEvents(closeOutputEvents, - EventProducerConsumerType.OUTPUT, taskSpec.getVertexName(), - destVertexName, taskSpec.getTaskAttemptID()); + allCloseOutputEvents.add(closeOutputEvents); } // Close the Processor. processorClosed = true; processor.close(); + for (int i = 0; i < allCloseInputEvents.size(); i++) { + String srcVertexName = inputSpecs.get(i).getSourceVertexName(); + sendTaskGeneratedEvents(allCloseInputEvents.get(i), + EventProducerConsumerType.INPUT, taskSpec.getVertexName(), + srcVertexName, taskSpec.getTaskAttemptID()); + } + + for (int i = 0; i < allCloseOutputEvents.size(); i++) { + String destVertexName = outputSpecs.get(i).getDestinationVertexName(); + sendTaskGeneratedEvents(allCloseOutputEvents.get(i), + EventProducerConsumerType.OUTPUT, taskSpec.getVertexName(), + destVertexName, taskSpec.getTaskAttemptID()); + } + } finally { setTaskDone(); // Clear the interrupt status since the task execution is done. @@ -447,9 +467,7 @@ protected Void callInternal() throws Exception { } protected Void _callInternal() throws Exception { - if (LOG.isDebugEnabled()) { - LOG.debug("Initializing Input using InputSpec: " + inputSpec); - } + LOG.debug("Initializing Input using InputSpec: {}", inputSpec); String edgeName = inputSpec.getSourceVertexName(); InputContext inputContext = createInputContext(inputsMap, inputSpec, inputIndex); LogicalInput input = createInput(inputSpec, inputContext); @@ -463,9 +481,7 @@ protected Void _callInternal() throws Exception { inputContext.getTaskVertexName(), inputContext.getSourceVertexName(), taskSpec.getTaskAttemptID()); initializedInputs.put(edgeName, input); - if (LOG.isDebugEnabled()) { - LOG.debug("Initialized Input with src edge: " + edgeName); - } + LOG.debug("Initialized Input with src edge: {}", edgeName); initializedInputs.put(edgeName, input); return null; } @@ -492,9 +508,7 @@ protected Void callInternal() throws Exception { } protected Void _callInternal() throws Exception { - if (LOG.isDebugEnabled()) { - LOG.debug("Starting Input with src edge: " + srcVertexName); - } + LOG.debug("Starting Input with src edge: {}", srcVertexName); input.start(); LOG.info("Started Input with src edge: " + srcVertexName); @@ -524,9 +538,7 @@ protected Void callInternal() throws Exception { } protected Void _callInternal() throws Exception { - if (LOG.isDebugEnabled()) { - LOG.debug("Initializing Output using OutputSpec: " + outputSpec); - } + LOG.debug("Initializing Output using OutputSpec: {}", outputSpec); String edgeName = outputSpec.getDestinationVertexName(); OutputContext outputContext = createOutputContext(outputSpec, outputIndex); LogicalOutput output = createOutput(outputSpec, outputContext); @@ -539,9 +551,7 @@ protected Void _callInternal() throws Exception { outputContext.getTaskVertexName(), outputContext.getDestinationVertexName(), taskSpec.getTaskAttemptID()); initializedOutputs.put(edgeName, output); - if (LOG.isDebugEnabled()) { - LOG.debug("Initialized Output with dest edge: " + edgeName); - } + LOG.debug("Initialized Output with dest edge: {}", edgeName); initializedOutputs.put(edgeName, output); return null; } @@ -559,9 +569,7 @@ private void initializeGroupInputs() throws TezException { if (groupInputSpecs != null && !groupInputSpecs.isEmpty()) { groupInputsMap = new ConcurrentHashMap(groupInputSpecs.size()); for (GroupInputSpec groupInputSpec : groupInputSpecs) { - if (LOG.isDebugEnabled()) { - LOG.debug("Initializing GroupInput using GroupInputSpec: " + groupInputSpec); - } + LOG.debug("Initializing GroupInput using GroupInputSpec: {}", groupInputSpec); MergedInputContext mergedInputContext = new TezMergedInputContextImpl(groupInputSpec.getMergedInputDescriptor().getUserPayload(), groupInputSpec.getGroupName(), groupInputsMap, inputReadyTracker, localDirs, this); @@ -945,8 +953,7 @@ public void cleanup() throws InterruptedException { LOG.info("Resetting interrupt for processor"); Thread.currentThread().interrupt(); } catch (Throwable e) { - LOG.warn( - "Ignoring Exception when closing processor(cleanup). Exception class={}, message={}" + + LOG.warn("Ignoring Exception when closing processor(cleanup). Exception class={}, message={}", e.getClass().getName(), e.getMessage()); } } @@ -1064,4 +1071,15 @@ public HadoopShim getHadoopShim() { public Configuration getTaskConf() { return tezConf; } + + @Override + public void setFrameworkCounters() { + super.setFrameworkCounters(); + if (tezCounters != null && isUpdatingSystemCounters()) { + long timeNs = initStartTimeNs == null ? 0 + : (System.nanoTime() - initStartTimeNs); + tezCounters.findCounter(TaskCounter.WALL_CLOCK_MILLISECONDS) + .setValue(TimeUnit.NANOSECONDS.toMillis(timeNs)); + } + } } diff --git a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/RuntimeTask.java b/tez-runtime-internals/src/main/java/org/apache/tez/runtime/RuntimeTask.java index 7b86d4bc02..4c44985eed 100644 --- a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/RuntimeTask.java +++ b/tez-runtime-internals/src/main/java/org/apache/tez/runtime/RuntimeTask.java @@ -18,6 +18,7 @@ package org.apache.tez.runtime; +import java.io.IOException; import java.util.Collection; import java.util.EnumSet; import java.util.Map; @@ -26,6 +27,8 @@ import java.util.concurrent.atomic.AtomicReference; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.LocalFileSystem; import org.apache.tez.common.counters.TezCounters; import org.apache.tez.dag.records.TezTaskAttemptID; import org.apache.tez.runtime.api.impl.TaskSpec; @@ -35,6 +38,11 @@ import org.apache.tez.runtime.metrics.TaskCounterUpdater; import com.google.common.collect.Maps; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import static org.apache.tez.dag.api.TezConfiguration.TEZ_TASK_LOCAL_FS_WRITE_LIMIT_BYTES; +import static org.apache.tez.dag.api.TezConfiguration.TEZ_TASK_LOCAL_FS_WRITE_LIMIT_BYTES_DEFAULT; public abstract class RuntimeTask { @@ -54,6 +62,9 @@ public abstract class RuntimeTask { private final TaskStatistics statistics; private final AtomicBoolean progressNotified = new AtomicBoolean(false); + private final long lfsBytesWriteLimit; + private static final Logger LOG = LoggerFactory.getLogger(RuntimeTask.class); + protected RuntimeTask(TaskSpec taskSpec, Configuration tezConf, TezUmbilical tezUmbilical, String pid, boolean setupSysCounterUpdater) { this.taskSpec = taskSpec; @@ -71,6 +82,8 @@ protected RuntimeTask(TaskSpec taskSpec, Configuration tezConf, } else { this.counterUpdater = null; } + this.lfsBytesWriteLimit = + tezConf.getLong(TEZ_TASK_LOCAL_FS_WRITE_LIMIT_BYTES, TEZ_TASK_LOCAL_FS_WRITE_LIMIT_BYTES_DEFAULT); } protected enum State { @@ -178,4 +191,40 @@ protected void setTaskDone() { } public abstract void abortTask(); + + protected final boolean isUpdatingSystemCounters() { + return counterUpdater != null; + } + + /** + * Check whether the task has exceeded any configured limits. + * + * @throws LocalWriteLimitException in case the limit is exceeded. + */ + public void checkTaskLimits() throws LocalWriteLimitException { + // check the limit for writing to local file system + if (lfsBytesWriteLimit >= 0) { + Long lfsBytesWritten = null; + try { + LocalFileSystem localFS = FileSystem.getLocal(tezConf); + lfsBytesWritten = FileSystem.getGlobalStorageStatistics().get(localFS.getScheme()).getLong("bytesWritten"); + } catch (IOException e) { + LOG.warn("Could not get LocalFileSystem bytesWritten counter"); + } + if (lfsBytesWritten != null && lfsBytesWritten > lfsBytesWriteLimit) { + throw new LocalWriteLimitException( + "Too much write to local file system." + " current value is " + lfsBytesWritten + " the limit is " + + lfsBytesWriteLimit); + } + } + } + + /** + * Exception thrown when the task exceeds some configured limits. + */ + public static class LocalWriteLimitException extends IOException { + public LocalWriteLimitException(String str) { + super(str); + } + } } diff --git a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/TezThreadDumpHelper.java b/tez-runtime-internals/src/main/java/org/apache/tez/runtime/TezThreadDumpHelper.java new file mode 100644 index 0000000000..022186a4b8 --- /dev/null +++ b/tez-runtime-internals/src/main/java/org/apache/tez/runtime/TezThreadDumpHelper.java @@ -0,0 +1,178 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tez.runtime; + +import com.google.common.util.concurrent.ThreadFactoryBuilder; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.log4j.Appender; +import org.apache.tez.common.Preconditions; +import org.apache.tez.common.TezContainerLogAppender; +import org.apache.tez.dag.api.TezConstants; +import org.apache.tez.dag.api.TezUncheckedException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.io.PrintStream; +import java.lang.management.ManagementFactory; +import java.lang.management.ThreadInfo; +import java.lang.management.ThreadMXBean; +import java.util.concurrent.Executors; +import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.TimeUnit; + +import static org.apache.hadoop.yarn.conf.YarnConfiguration.DEFAULT_NM_REMOTE_APP_LOG_DIR; +import static org.apache.hadoop.yarn.conf.YarnConfiguration.NM_REMOTE_APP_LOG_DIR; +import static org.apache.tez.dag.api.TezConfiguration.TEZ_THREAD_DUMP_INTERVAL; +import static org.apache.tez.dag.api.TezConfiguration.TEZ_THREAD_DUMP_INTERVAL_DEFAULT; + +public class TezThreadDumpHelper { + + private final long duration; + private final Path basePath; + private final FileSystem fs; + + private static final ThreadMXBean THREAD_BEAN = ManagementFactory.getThreadMXBean(); + private static final Logger LOG = LoggerFactory.getLogger(TezThreadDumpHelper.class); + + private ScheduledExecutorService periodicThreadDumpServiceExecutor; + + private TezThreadDumpHelper(long duration, Configuration conf) throws IOException { + this.duration = duration; + Appender appender = org.apache.log4j.Logger.getRootLogger().getAppender(TezConstants.TEZ_CONTAINER_LOGGER_NAME); + if (appender instanceof TezContainerLogAppender) { + this.basePath = new Path(((TezContainerLogAppender) appender).getContainerLogDir()); + this.fs = FileSystem.getLocal(conf); + } else { + // Fallback, if it is any other appender or if none is configured. + this.basePath = new Path(conf.get(NM_REMOTE_APP_LOG_DIR, DEFAULT_NM_REMOTE_APP_LOG_DIR)); + this.fs = this.basePath.getFileSystem(conf); + } + LOG.info("Periodic Thread Dump Capture Service Configured to capture Thread Dumps at {} ms frequency and at " + + "path: {}", duration, basePath); + } + + public static TezThreadDumpHelper getInstance(Configuration conf) { + long periodicThreadDumpFrequency = conf.getTimeDuration(TEZ_THREAD_DUMP_INTERVAL, + TEZ_THREAD_DUMP_INTERVAL_DEFAULT, TimeUnit.MILLISECONDS); + Preconditions.checkArgument(periodicThreadDumpFrequency > 0, "%s must be positive duration", + TEZ_THREAD_DUMP_INTERVAL); + + try { + return new TezThreadDumpHelper(periodicThreadDumpFrequency, conf); + } catch (IOException e) { + throw new TezUncheckedException("Can not initialize periodic thread dump service", e); + } + } + + public TezThreadDumpHelper start(String name) { + periodicThreadDumpServiceExecutor = Executors.newScheduledThreadPool(1, + new ThreadFactoryBuilder().setDaemon(true).setNameFormat("PeriodicThreadDumpService{" + name + "} #%d") + .build()); + Runnable threadDumpCollector = new ThreadDumpCollector(basePath, name, fs); + periodicThreadDumpServiceExecutor.schedule(threadDumpCollector, duration, TimeUnit.MILLISECONDS); + return this; + } + + public void stop() { + if (periodicThreadDumpServiceExecutor != null) { + periodicThreadDumpServiceExecutor.shutdown(); + + try { + if (!periodicThreadDumpServiceExecutor.awaitTermination(100, TimeUnit.MILLISECONDS)) { + periodicThreadDumpServiceExecutor.shutdownNow(); + } + } catch (InterruptedException ignored) { + // Ignore interrupt, will attempt a final shutdown below. + } + periodicThreadDumpServiceExecutor.shutdownNow(); + periodicThreadDumpServiceExecutor = null; + } + } + + private static class ThreadDumpCollector implements Runnable { + + private final Path path; + private final String name; + private final FileSystem fs; + + ThreadDumpCollector(Path path, String name, FileSystem fs) { + this.path = path; + this.fs = fs; + this.name = name; + } + + @Override + public void run() { + if (!Thread.interrupted()) { + try (FSDataOutputStream fsStream = fs.create( + new Path(path, name + "_" + System.currentTimeMillis() + ".jstack")); + PrintStream printStream = new PrintStream(fsStream, false, "UTF8")) { + printThreadInfo(printStream, name); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + } + + public synchronized void printThreadInfo(PrintStream stream, String title) { + boolean contention = THREAD_BEAN.isThreadContentionMonitoringEnabled(); + long[] threadIds = THREAD_BEAN.getAllThreadIds(); + stream.println("Process Thread Dump: " + title); + stream.println(threadIds.length + " active threads"); + for (long tid : threadIds) { + ThreadInfo info = THREAD_BEAN.getThreadInfo(tid, Integer.MAX_VALUE); + if (info == null) { + stream.println(" Inactive"); + continue; + } + stream.println("Thread " + getTaskName(info.getThreadId(), info.getThreadName()) + ":"); + Thread.State state = info.getThreadState(); + stream.println(" State: " + state); + stream.println(" Blocked count: " + info.getBlockedCount()); + stream.println(" Waited count: " + info.getWaitedCount()); + if (contention) { + stream.println(" Blocked time: " + info.getBlockedTime()); + stream.println(" Waited time: " + info.getWaitedTime()); + } + if (state == Thread.State.WAITING) { + stream.println(" Waiting on " + info.getLockName()); + } else if (state == Thread.State.BLOCKED) { + stream.println(" Blocked on " + info.getLockName()); + stream.println(" Blocked by " + getTaskName(info.getLockOwnerId(), info.getLockOwnerName())); + } + stream.println(" Stack:"); + for (StackTraceElement frame : info.getStackTrace()) { + stream.println(" " + frame.toString()); + } + } + stream.flush(); + } + + private String getTaskName(long id, String taskName) { + if (taskName == null) { + return Long.toString(id); + } + return id + " (" + taskName + ")"; + } + } +} diff --git a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/api/impl/EventMetaData.java b/tez-runtime-internals/src/main/java/org/apache/tez/runtime/api/impl/EventMetaData.java index 0ee96af27d..abe8c9dd53 100644 --- a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/api/impl/EventMetaData.java +++ b/tez-runtime-internals/src/main/java/org/apache/tez/runtime/api/impl/EventMetaData.java @@ -18,23 +18,25 @@ package org.apache.tez.runtime.api.impl; -import static com.google.common.base.Preconditions.checkNotNull; + import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; +import java.util.Objects; import javax.annotation.Nullable; import org.apache.hadoop.io.Writable; -import org.apache.hadoop.util.StringInterner; +import org.apache.tez.dag.records.TaskAttemptIDAware; import org.apache.tez.dag.records.TezTaskAttemptID; +import org.apache.tez.util.StringInterner; /** * Class that encapsulates all the information to identify the unique * object that either generated an Event or is the recipient of an Event. */ -public class EventMetaData implements Writable { +public class EventMetaData implements Writable, TaskAttemptIDAware { public static enum EventProducerConsumerType { INPUT, @@ -70,11 +72,11 @@ public EventMetaData() { public EventMetaData(EventProducerConsumerType generator, String taskVertexName, @Nullable String edgeVertexName, @Nullable TezTaskAttemptID taskAttemptID) { - checkNotNull(generator, "generator is null"); - checkNotNull(taskVertexName, "taskVertexName is null"); + Objects.requireNonNull(generator, "generator is null"); + Objects.requireNonNull(taskVertexName, "taskVertexName is null"); this.producerConsumerType = generator; - this.taskVertexName = StringInterner.weakIntern(taskVertexName); - this.edgeVertexName = StringInterner.weakIntern(edgeVertexName); + this.taskVertexName = StringInterner.intern(taskVertexName); + this.edgeVertexName = StringInterner.intern(edgeVertexName); this.taskAttemptID = taskAttemptID; } @@ -82,6 +84,7 @@ public EventProducerConsumerType getEventGenerator() { return producerConsumerType; } + @Override public TezTaskAttemptID getTaskAttemptID() { return taskAttemptID; } @@ -121,10 +124,10 @@ public void write(DataOutput out) throws IOException { public void readFields(DataInput in) throws IOException { producerConsumerType = EventProducerConsumerType.values()[in.readInt()]; if (in.readBoolean()) { - taskVertexName = StringInterner.weakIntern(in.readUTF()); + taskVertexName = StringInterner.intern(in.readUTF()); } if (in.readBoolean()) { - edgeVertexName = StringInterner.weakIntern(in.readUTF()); + edgeVertexName = StringInterner.intern(in.readUTF()); } if (in.readBoolean()) { taskAttemptID = TezTaskAttemptID.readTezTaskAttemptID(in); diff --git a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/api/impl/EventType.java b/tez-runtime-internals/src/main/java/org/apache/tez/runtime/api/impl/EventType.java index e573526671..7e365b1870 100644 --- a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/api/impl/EventType.java +++ b/tez-runtime-internals/src/main/java/org/apache/tez/runtime/api/impl/EventType.java @@ -31,4 +31,5 @@ public enum EventType { COMPOSITE_DATA_MOVEMENT_EVENT, ROOT_INPUT_INITIALIZER_EVENT, COMPOSITE_ROUTED_DATA_MOVEMENT_EVENT, + CUSTOM_PROCESSOR_EVENT, } diff --git a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/api/impl/GroupInputSpec.java b/tez-runtime-internals/src/main/java/org/apache/tez/runtime/api/impl/GroupInputSpec.java index 83c80bd9f6..0177bc84fa 100644 --- a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/api/impl/GroupInputSpec.java +++ b/tez-runtime-internals/src/main/java/org/apache/tez/runtime/api/impl/GroupInputSpec.java @@ -25,8 +25,8 @@ import org.apache.hadoop.io.Text; import org.apache.hadoop.io.Writable; -import org.apache.hadoop.util.StringInterner; import org.apache.tez.dag.api.InputDescriptor; +import org.apache.tez.util.StringInterner; import com.google.common.collect.Lists; @@ -53,7 +53,7 @@ public InputDescriptor getMergedInputDescriptor() { } public GroupInputSpec(String groupName, List groupVertices, InputDescriptor inputDescriptor) { - this.groupName = StringInterner.weakIntern(groupName); + this.groupName = StringInterner.intern(groupName); this.groupVertices = groupVertices; this.mergedInputDescriptor = inputDescriptor; } @@ -70,11 +70,11 @@ public void write(DataOutput out) throws IOException { @Override public void readFields(DataInput in) throws IOException { - groupName = StringInterner.weakIntern(Text.readString(in)); + groupName = StringInterner.intern(Text.readString(in)); int numMembers = in.readInt(); groupVertices = Lists.newArrayListWithCapacity(numMembers); for (int i=0; i inputSpecList, List outputSpecList, @Nullable List groupInputSpecList, Configuration taskConf) { - checkNotNull(dagName, "dagName is null"); - checkNotNull(vertexName, "vertexName is null"); - checkNotNull(processorDescriptor, "processorDescriptor is null"); - checkNotNull(inputSpecList, "inputSpecList is null"); - checkNotNull(outputSpecList, "outputSpecList is null"); + Objects.requireNonNull(dagName, "dagName is null"); + Objects.requireNonNull(vertexName, "vertexName is null"); + Objects.requireNonNull(processorDescriptor, "processorDescriptor is null"); + Objects.requireNonNull(inputSpecList, "inputSpecList is null"); + Objects.requireNonNull(outputSpecList, "outputSpecList is null"); this.taskAttemptId = null; - this.dagName = StringInterner.weakIntern(dagName); - this.vertexName = StringInterner.weakIntern(vertexName); + this.dagName = StringInterner.intern(dagName); + this.vertexName = StringInterner.intern(vertexName); this.processorDescriptor = processorDescriptor; this.inputSpecList = inputSpecList; this.outputSpecList = outputSpecList; @@ -105,15 +107,15 @@ public TaskSpec(TezTaskAttemptID taskAttemptID, ProcessorDescriptor processorDescriptor, List inputSpecList, List outputSpecList, @Nullable List groupInputSpecList, Configuration taskConf) { - checkNotNull(taskAttemptID, "taskAttemptID is null"); - checkNotNull(dagName, "dagName is null"); - checkNotNull(vertexName, "vertexName is null"); - checkNotNull(processorDescriptor, "processorDescriptor is null"); - checkNotNull(inputSpecList, "inputSpecList is null"); - checkNotNull(outputSpecList, "outputSpecList is null"); + Objects.requireNonNull(taskAttemptID, "taskAttemptID is null"); + Objects.requireNonNull(dagName, "dagName is null"); + Objects.requireNonNull(vertexName, "vertexName is null"); + Objects.requireNonNull(processorDescriptor, "processorDescriptor is null"); + Objects.requireNonNull(inputSpecList, "inputSpecList is null"); + Objects.requireNonNull(outputSpecList, "outputSpecList is null"); this.taskAttemptId = taskAttemptID; - this.dagName = StringInterner.weakIntern(dagName); - this.vertexName = StringInterner.weakIntern(vertexName); + this.dagName = StringInterner.intern(dagName); + this.vertexName = StringInterner.intern(vertexName); this.processorDescriptor = processorDescriptor; this.inputSpecList = inputSpecList; this.outputSpecList = outputSpecList; @@ -127,7 +129,7 @@ public String getDAGName() { } public int getDagIdentifier() { - return taskAttemptId.getTaskID().getVertexID().getDAGId().getId(); + return taskAttemptId.getDAGID().getId(); } public int getVertexParallelism() { @@ -138,6 +140,7 @@ public String getVertexName() { return vertexName; } + @Override public TezTaskAttemptID getTaskAttemptID() { return taskAttemptId; } @@ -197,8 +200,8 @@ public void write(DataOutput out) throws IOException { @Override public void readFields(DataInput in) throws IOException { taskAttemptId = TezTaskAttemptID.readTezTaskAttemptID(in); - dagName = StringInterner.weakIntern(in.readUTF()); - vertexName = StringInterner.weakIntern(in.readUTF()); + dagName = StringInterner.intern(in.readUTF()); + vertexName = StringInterner.intern(in.readUTF()); vertexParallelism = in.readInt(); // TODO TEZ-305 convert this to PB processorDescriptor = new ProcessorDescriptor(); @@ -265,5 +268,4 @@ public String toString() { } return sb.toString(); } - } diff --git a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/api/impl/TaskStatistics.java b/tez-runtime-internals/src/main/java/org/apache/tez/runtime/api/impl/TaskStatistics.java index 0b4bef8280..961c28ef60 100644 --- a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/api/impl/TaskStatistics.java +++ b/tez-runtime-internals/src/main/java/org/apache/tez/runtime/api/impl/TaskStatistics.java @@ -21,12 +21,13 @@ import java.io.DataOutput; import java.io.IOException; import java.util.Map; +import java.util.Objects; import org.apache.hadoop.io.Text; import org.apache.hadoop.io.Writable; -import org.apache.hadoop.util.StringInterner; -import com.google.common.base.Preconditions; +import org.apache.tez.util.StringInterner; + import com.google.common.collect.Maps; public class TaskStatistics implements Writable { @@ -39,8 +40,8 @@ public void addIO(String edgeName) { } public void addIO(String edgeName, IOStatistics stats) { - Preconditions.checkArgument(stats != null, edgeName); - ioStatistics.put(StringInterner.weakIntern(edgeName), stats); + Objects.requireNonNull(stats, edgeName); + ioStatistics.put(StringInterner.intern(edgeName), stats); } public Map getIOStatistics() { diff --git a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/api/impl/TezEvent.java b/tez-runtime-internals/src/main/java/org/apache/tez/runtime/api/impl/TezEvent.java index 1a90ada78b..f96a437a49 100644 --- a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/api/impl/TezEvent.java +++ b/tez-runtime-internals/src/main/java/org/apache/tez/runtime/api/impl/TezEvent.java @@ -30,6 +30,7 @@ import org.apache.tez.dag.api.TezUncheckedException; import org.apache.tez.runtime.api.Event; import org.apache.tez.runtime.api.events.CompositeDataMovementEvent; +import org.apache.tez.runtime.api.events.CustomProcessorEvent; import org.apache.tez.runtime.api.events.DataMovementEvent; import org.apache.tez.runtime.api.events.CompositeRoutedDataMovementEvent; import org.apache.tez.runtime.api.events.EventProtos; @@ -57,6 +58,8 @@ import com.google.protobuf.CodedInputStream; import com.google.protobuf.CodedOutputStream; +import static org.apache.tez.runtime.api.events.EventProtos.*; + public class TezEvent implements Writable { private EventType eventType; @@ -82,6 +85,8 @@ public TezEvent(Event event, EventMetaData sourceInfo, long time) { this.setSourceInfo(sourceInfo); if (event instanceof DataMovementEvent) { eventType = EventType.DATA_MOVEMENT_EVENT; + } else if (event instanceof CustomProcessorEvent) { + eventType = EventType.CUSTOM_PROCESSOR_EVENT; } else if (event instanceof CompositeDataMovementEvent) { eventType = EventType.COMPOSITE_DATA_MOVEMENT_EVENT; } else if (event instanceof CompositeRoutedDataMovementEvent) { @@ -157,6 +162,11 @@ private void serializeEvent(DataOutput out) throws IOException { } else { AbstractMessage message; switch (eventType) { + case CUSTOM_PROCESSOR_EVENT: + message = + ProtoConverters.convertCustomProcessorEventToProto( + (CustomProcessorEvent) event); + break; case DATA_MOVEMENT_EVENT: message = ProtoConverters.convertDataMovementEventToProto( @@ -181,6 +191,9 @@ private void serializeEvent(DataOutput out) throws IOException { .setIndex(ideEvt.getIndex()) .setDiagnostics(ideEvt.getDiagnostics()) .setVersion(ideEvt.getVersion()) + .setIsLocalFetch(ideEvt.isLocalFetch()) + .setIsDiskErrorAtSource(ideEvt.isDiskErrorAtSource()) + .setDestinationLocalhostName(ideEvt.getDestinationLocalhostName()) .build(); break; case TASK_ATTEMPT_FAILED_EVENT: @@ -260,6 +273,11 @@ private void deserializeEvent(DataInput in) throws IOException { } input = CodedInputStream.newInstance(eventBytes, startOffset, eventBytesLen); switch (eventType) { + case CUSTOM_PROCESSOR_EVENT: + CustomProcessorEventProto cpProto = + CustomProcessorEventProto.parseFrom(input); + event = ProtoConverters.convertCustomProcessorEventFromProto(cpProto); + break; case DATA_MOVEMENT_EVENT: DataMovementEventProto dmProto = DataMovementEventProto.parseFrom(input); @@ -279,10 +297,10 @@ private void deserializeEvent(DataInput in) throws IOException { event = ProtoConverters.convertVertexManagerEventFromProto(vmProto); break; case INPUT_READ_ERROR_EVENT: - InputReadErrorEventProto ideProto = - InputReadErrorEventProto.parseFrom(input); - event = InputReadErrorEvent.create(ideProto.getDiagnostics(), - ideProto.getIndex(), ideProto.getVersion()); + InputReadErrorEventProto ideProto = InputReadErrorEventProto.parseFrom(input); + event = InputReadErrorEvent.create(ideProto.getDiagnostics(), ideProto.getIndex(), + ideProto.getVersion(), ideProto.getIsLocalFetch(), ideProto.getIsDiskErrorAtSource(), + ideProto.getDestinationLocalhostName()); break; case TASK_ATTEMPT_FAILED_EVENT: TaskAttemptFailedEventProto tfProto = diff --git a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/api/impl/TezHeartbeatRequest.java b/tez-runtime-internals/src/main/java/org/apache/tez/runtime/api/impl/TezHeartbeatRequest.java index 7ed89f813b..fd5bc17521 100644 --- a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/api/impl/TezHeartbeatRequest.java +++ b/tez-runtime-internals/src/main/java/org/apache/tez/runtime/api/impl/TezHeartbeatRequest.java @@ -39,13 +39,14 @@ public class TezHeartbeatRequest implements Writable { private int preRoutedStartIndex; private int maxEvents; private long requestId; + private long usedMemory; public TezHeartbeatRequest() { } public TezHeartbeatRequest(long requestId, List events, int preRoutedStartIndex, String containerIdentifier, - TezTaskAttemptID taskAttemptID, int startIndex, int maxEvents) { + TezTaskAttemptID taskAttemptID, int startIndex, int maxEvents, long usedMemory) { this.containerIdentifier = containerIdentifier; this.requestId = requestId; this.events = Collections.unmodifiableList(events); @@ -53,6 +54,7 @@ public TezHeartbeatRequest(long requestId, List events, this.preRoutedStartIndex = preRoutedStartIndex; this.maxEvents = maxEvents; this.currentTaskAttemptID = taskAttemptID; + this.usedMemory = usedMemory; } public String getContainerIdentifier() { @@ -83,6 +85,10 @@ public TezTaskAttemptID getCurrentTaskAttemptID() { return currentTaskAttemptID; } + public long getUsedMemory() { + return usedMemory; + } + @Override public void write(DataOutput out) throws IOException { if (events != null) { @@ -105,6 +111,7 @@ public void write(DataOutput out) throws IOException { out.writeInt(maxEvents); out.writeLong(requestId); Text.writeString(out, containerIdentifier); + out.writeLong(usedMemory); } @Override @@ -128,6 +135,7 @@ public void readFields(DataInput in) throws IOException { maxEvents = in.readInt(); requestId = in.readLong(); containerIdentifier = Text.readString(in); + usedMemory = in.readLong(); } @Override @@ -140,6 +148,7 @@ public String toString() { + ", maxEventsToGet=" + maxEvents + ", taskAttemptId=" + currentTaskAttemptID + ", eventCount=" + (events != null ? events.size() : 0) + + ", usedMemory=" + usedMemory + " }"; } } diff --git a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/api/impl/TezInputContextImpl.java b/tez-runtime-internals/src/main/java/org/apache/tez/runtime/api/impl/TezInputContextImpl.java index 15a6485791..1c1c10bf86 100644 --- a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/api/impl/TezInputContextImpl.java +++ b/tez-runtime-internals/src/main/java/org/apache/tez/runtime/api/impl/TezInputContextImpl.java @@ -18,15 +18,12 @@ package org.apache.tez.runtime.api.impl; -import com.google.common.base.Preconditions; - -import static com.google.common.base.Preconditions.checkNotNull; - import java.io.IOException; import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.List; import java.util.Map; +import java.util.Objects; import javax.annotation.Nullable; @@ -102,10 +99,10 @@ vertexParallelism, taskAttemptID, wrapCounters(runtimeTask, taskVertexName, sourceVertexName, conf), runtimeTask, tezUmbilical, serviceConsumerMetadata, auxServiceEnv, memDist, inputDescriptor, objectRegistry, ExecutionContext, memAvailable, sharedExecutor); - checkNotNull(inputIndex, "inputIndex is null"); - checkNotNull(sourceVertexName, "sourceVertexName is null"); - checkNotNull(inputs, "input map is null"); - checkNotNull(inputReadyTracker, "inputReadyTracker is null"); + Objects.requireNonNull(inputIndex, "inputIndex is null"); + Objects.requireNonNull(sourceVertexName, "sourceVertexName is null"); + Objects.requireNonNull(inputs, "input map is null"); + Objects.requireNonNull(inputReadyTracker, "inputReadyTracker is null"); this.userPayload = userPayload; this.inputIndex = inputIndex; this.sourceVertexName = sourceVertexName; @@ -131,7 +128,7 @@ private static TezCounters wrapCounters(LogicalIOProcessorRuntimeTask task, Stri @Override public void sendEvents(List events) { - Preconditions.checkNotNull(events, "events are null"); + Objects.requireNonNull(events, "events are null"); List tezEvents = new ArrayList(events.size()); for (Event e : events) { TezEvent tEvt = new TezEvent(e, sourceInfo); @@ -155,6 +152,11 @@ public String getSourceVertexName() { return sourceVertexName; } + @Override + public String getInputOutputVertexNames() { + return String.format("%s -> %s", getSourceVertexName(), getTaskVertexName()); + } + @Override public void fatalError(Throwable exception, String message) { super.signalFatalError(exception, message, sourceInfo); @@ -190,8 +192,6 @@ public void close() throws IOException { super.close(); this.userPayload = null; this.inputReadyTracker = null; - if (LOG.isDebugEnabled()) { - LOG.debug("Cleared TezInputContextImpl related information"); - } + LOG.debug("Cleared TezInputContextImpl related information"); } } diff --git a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/api/impl/TezMergedInputContextImpl.java b/tez-runtime-internals/src/main/java/org/apache/tez/runtime/api/impl/TezMergedInputContextImpl.java index e35e3325f1..553efcd467 100644 --- a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/api/impl/TezMergedInputContextImpl.java +++ b/tez-runtime-internals/src/main/java/org/apache/tez/runtime/api/impl/TezMergedInputContextImpl.java @@ -18,12 +18,13 @@ package org.apache.tez.runtime.api.impl; -import static com.google.common.base.Preconditions.checkNotNull; + import javax.annotation.Nullable; import java.util.Arrays; import java.util.Map; +import java.util.Objects; import org.apache.tez.dag.api.UserPayload; import org.apache.tez.runtime.InputReadyTracker; @@ -45,9 +46,9 @@ public TezMergedInputContextImpl(@Nullable UserPayload userPayload, String group Map groupInputsMap, InputReadyTracker inputReadyTracker, String[] workDirs, LogicalIOProcessorRuntimeTask runtimeTask) { - checkNotNull(groupInputName, "groupInputName is null"); - checkNotNull(groupInputsMap, "input-group map is null"); - checkNotNull(inputReadyTracker, "inputReadyTracker is null"); + Objects.requireNonNull(groupInputName, "groupInputName is null"); + Objects.requireNonNull(groupInputsMap, "input-group map is null"); + Objects.requireNonNull(inputReadyTracker, "inputReadyTracker is null"); this.groupInputName = groupInputName; this.groupInputsMap = groupInputsMap; this.userPayload = userPayload; diff --git a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/api/impl/TezOutputContextImpl.java b/tez-runtime-internals/src/main/java/org/apache/tez/runtime/api/impl/TezOutputContextImpl.java index 41e8d41fd0..a17bc8900d 100644 --- a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/api/impl/TezOutputContextImpl.java +++ b/tez-runtime-internals/src/main/java/org/apache/tez/runtime/api/impl/TezOutputContextImpl.java @@ -18,21 +18,20 @@ package org.apache.tez.runtime.api.impl; -import com.google.common.base.Preconditions; - -import static com.google.common.base.Preconditions.checkNotNull; - import java.io.IOException; import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.List; import java.util.Map; +import java.util.Objects; import javax.annotation.Nullable; import org.apache.hadoop.classification.InterfaceAudience.Private; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.yarn.event.EventHandler; import org.apache.tez.common.TezExecutors; +import org.apache.tez.runtime.internals.api.TezTrapEvent; import org.apache.tez.common.counters.TezCounters; import org.apache.tez.dag.api.OutputDescriptor; import org.apache.tez.dag.api.TezConfiguration; @@ -50,17 +49,28 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +@SuppressWarnings("unchecked") public class TezOutputContextImpl extends TezTaskContextImpl implements OutputContext { private static final Logger LOG = LoggerFactory.getLogger(TezOutputContextImpl.class); private volatile UserPayload userPayload; + + /** + * Holds whether we can accept more events to send to the AM. + */ + private volatile boolean trapEvents; private final String destinationVertexName; private final EventMetaData sourceInfo; private final int outputIndex; private final OutputStatisticsReporterImpl statsReporter; + /** + * Handler for the events after the trap flag is set. + */ + private EventHandler trapEventHandler; + class OutputStatisticsReporterImpl implements OutputStatisticsReporter { @Override @@ -74,7 +84,7 @@ public synchronized void reportDataSize(long size) { public void reportItemsProcessed(long items) { // this is a concurrent map. Plus we are not adding/deleting entries runtimeTask.getTaskStatistics().getIOStatistics().get(destinationVertexName) - .setItemsProcessed(items);; + .setItemsProcessed(items); } } @@ -97,8 +107,8 @@ public TezOutputContextImpl(Configuration conf, String[] workDirs, int appAttemp runtimeTask, tezUmbilical, serviceConsumerMetadata, auxServiceEnv, memDist, outputDescriptor, objectRegistry, executionContext, memAvailable, sharedExecutor); - checkNotNull(outputIndex, "outputIndex is null"); - checkNotNull(destinationVertexName, "destinationVertexName is null"); + Objects.requireNonNull(outputIndex, "outputIndex is null"); + Objects.requireNonNull(destinationVertexName, "destinationVertexName is null"); this.userPayload = userPayload; this.outputIndex = outputIndex; this.destinationVertexName = destinationVertexName; @@ -121,13 +131,17 @@ private static TezCounters wrapCounters(LogicalIOProcessorRuntimeTask runtimeTas @Override public void sendEvents(List events) { - Preconditions.checkNotNull(events, "events are null"); + Objects.requireNonNull(events, "events are null"); List tezEvents = new ArrayList(events.size()); for (Event e : events) { TezEvent tEvt = new TezEvent(e, sourceInfo); tezEvents.add(tEvt); } - tezUmbilical.addEvents(tezEvents); + if (trapEvents) { + trapEventHandler.handle(new TezTrapEvent(tezEvents)); + } else { + tezUmbilical.addEvents(tezEvents); + } } @Override @@ -140,6 +154,12 @@ public String getDestinationVertexName() { return destinationVertexName; } + + @Override + public String getInputOutputVertexNames() { + return String.format("%s -> %s", getTaskVertexName(), getDestinationVertexName()); + } + @Override public void fatalError(Throwable exception, String message) { super.signalFatalError(exception, message, sourceInfo); @@ -166,12 +186,19 @@ public OutputStatisticsReporter getStatisticsReporter() { return statsReporter; } + /** + * This will monitor some of the events that will be sent. + */ + @Override + public final void trapEvents(final EventHandler eventHandler) { + trapEvents = true; + this.trapEventHandler = eventHandler; + } + @Override public void close() throws IOException { super.close(); this.userPayload = null; - if (LOG.isDebugEnabled()) { - LOG.debug("Cleared TezOutputContextImpl related information"); - } + LOG.debug("Cleared TezOutputContextImpl related information"); } } diff --git a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/api/impl/TezProcessorContextImpl.java b/tez-runtime-internals/src/main/java/org/apache/tez/runtime/api/impl/TezProcessorContextImpl.java index beae693053..e09aa8377e 100644 --- a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/api/impl/TezProcessorContextImpl.java +++ b/tez-runtime-internals/src/main/java/org/apache/tez/runtime/api/impl/TezProcessorContextImpl.java @@ -18,16 +18,13 @@ package org.apache.tez.runtime.api.impl; -import com.google.common.base.Preconditions; - -import static com.google.common.base.Preconditions.checkNotNull; - import java.io.IOException; import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.Collection; import java.util.List; import java.util.Map; +import java.util.Objects; import javax.annotation.Nullable; @@ -69,7 +66,7 @@ public TezProcessorContextImpl(Configuration conf, String[] workDirs, int appAtt runtimeTask.addAndGetTezCounter(vertexName), runtimeTask, tezUmbilical, serviceConsumerMetadata, auxServiceEnv, memDist, processorDescriptor, objectRegistry, ExecutionContext, memAvailable, sharedExecutor); - checkNotNull(inputReadyTracker, "inputReadyTracker is null"); + Objects.requireNonNull(inputReadyTracker, "inputReadyTracker is null"); this.userPayload = userPayload; this.sourceInfo = new EventMetaData(EventProducerConsumerType.PROCESSOR, taskVertexName, "", taskAttemptID); @@ -78,7 +75,7 @@ public TezProcessorContextImpl(Configuration conf, String[] workDirs, int appAtt @Override public void sendEvents(List events) { - Preconditions.checkNotNull(events, "events are null"); + Objects.requireNonNull(events, "events are null"); List tezEvents = new ArrayList(events.size()); for (Event e : events) { TezEvent tEvt = new TezEvent(e, sourceInfo); @@ -93,8 +90,8 @@ public UserPayload getUserPayload() { } @Override - public void setProgress(float progress) { - if (Math.abs(progress - runtimeTask.getProgress()) >= 0.001f) { + public void setProgressInternally(float progress) { + if (Float.compare(progress, runtimeTask.getProgress()) != 0) { runtimeTask.setProgress(progress); notifyProgress(); } @@ -146,9 +143,7 @@ public void close() throws IOException { super.close(); this.userPayload = null; this.inputReadyTracker = null; - if (LOG.isDebugEnabled()) { - LOG.debug("Cleared TezProcessorContextImpl related information"); - } + LOG.debug("Cleared TezProcessorContextImpl related information"); } } diff --git a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/api/impl/TezTaskContextImpl.java b/tez-runtime-internals/src/main/java/org/apache/tez/runtime/api/impl/TezTaskContextImpl.java index 5a6a405608..b645346654 100644 --- a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/api/impl/TezTaskContextImpl.java +++ b/tez-runtime-internals/src/main/java/org/apache/tez/runtime/api/impl/TezTaskContextImpl.java @@ -18,7 +18,7 @@ package org.apache.tez.runtime.api.impl; -import static com.google.common.base.Preconditions.checkNotNull; + import java.io.Closeable; import java.io.IOException; @@ -27,6 +27,7 @@ import java.util.Map; import java.util.concurrent.ExecutorService; import java.util.concurrent.atomic.AtomicInteger; +import java.util.Objects; import javax.annotation.Nullable; @@ -46,7 +47,7 @@ import org.apache.tez.runtime.api.TaskContext; import org.apache.tez.runtime.common.resources.MemoryDistributor; -import com.google.common.base.Preconditions; +import org.apache.tez.common.Preconditions; public abstract class TezTaskContextImpl implements TaskContext, Closeable { @@ -55,6 +56,7 @@ public abstract class TezTaskContextImpl implements TaskContext, Closeable { protected final String taskVertexName; protected final TezTaskAttemptID taskAttemptID; private final TezCounters counters; + private Configuration configuration; private String[] workDirs; private String uniqueIdentifier; protected final LogicalIOProcessorRuntimeTask runtimeTask; @@ -79,17 +81,18 @@ public TezTaskContextImpl(Configuration conf, String[] workDirs, int appAttemptN Map auxServiceEnv, MemoryDistributor memDist, EntityDescriptor descriptor, ObjectRegistry objectRegistry, ExecutionContext ExecutionContext, long memAvailable, TezExecutors sharedExecutor) { - checkNotNull(conf, "conf is null"); - checkNotNull(dagName, "dagName is null"); - checkNotNull(taskVertexName, "taskVertexName is null"); - checkNotNull(taskAttemptID, "taskAttemptId is null"); - checkNotNull(counters, "counters is null"); - checkNotNull(runtimeTask, "runtimeTask is null"); - checkNotNull(auxServiceEnv, "auxServiceEnv is null"); - checkNotNull(memDist, "memDist is null"); - checkNotNull(descriptor, "descriptor is null"); - checkNotNull(sharedExecutor, "sharedExecutor is null"); + Objects.requireNonNull(conf, "conf is null"); + Objects.requireNonNull(dagName, "dagName is null"); + Objects.requireNonNull(taskVertexName, "taskVertexName is null"); + Objects.requireNonNull(taskAttemptID, "taskAttemptId is null"); + Objects.requireNonNull(counters, "counters is null"); + Objects.requireNonNull(runtimeTask, "runtimeTask is null"); + Objects.requireNonNull(auxServiceEnv, "auxServiceEnv is null"); + Objects.requireNonNull(memDist, "memDist is null"); + Objects.requireNonNull(descriptor, "descriptor is null"); + Objects.requireNonNull(sharedExecutor, "sharedExecutor is null"); this.dagName = dagName; + this.configuration = conf; this.taskVertexName = taskVertexName; this.taskAttemptID = taskAttemptID; this.counters = counters; @@ -115,8 +118,7 @@ public TezTaskContextImpl(Configuration conf, String[] workDirs, int appAttemptN @Override public ApplicationId getApplicationId() { - return taskAttemptID.getTaskID().getVertexID().getDAGId() - .getApplicationId(); + return taskAttemptID.getApplicationId(); } @Override @@ -134,6 +136,11 @@ public int getTaskAttemptNumber() { return taskAttemptID.getId(); } + @Override + public Configuration getContainerConfiguration() { + return configuration; + } + @Override public String getDAGName() { return dagName; @@ -146,12 +153,12 @@ public String getTaskVertexName() { @Override public int getTaskVertexIndex() { - return taskAttemptID.getTaskID().getVertexID().getId(); + return taskAttemptID.getVertexID().getId(); } @Override public int getDagIdentifier() { - return taskAttemptID.getTaskID().getVertexID().getDAGId().getId(); + return taskAttemptID.getDAGID().getId(); } @Override @@ -193,7 +200,7 @@ public ByteBuffer getServiceConsumerMetaData(String serviceName) { @Nullable @Override public ByteBuffer getServiceProviderMetaData(String serviceName) { - Preconditions.checkNotNull(serviceName, "serviceName is null"); + Objects.requireNonNull(serviceName, "serviceName is null"); return AuxiliaryServiceHelper.getServiceDataFromEnv( serviceName, auxServiceEnv); } @@ -225,7 +232,7 @@ protected void signalFatalError(Throwable t, String message, EventMetaData sourc protected void signalFailure(TaskFailureType taskFailureType, Throwable t, String message, EventMetaData sourceInfo) { - Preconditions.checkNotNull(taskFailureType, "TaskFailureType must be specified"); + Objects.requireNonNull(taskFailureType, "TaskFailureType must be specified"); runtimeTask.setFrameworkCounters(); runtimeTask.registerError(); tezUmbilical.signalFailure(taskAttemptID, taskFailureType, t, message, sourceInfo); diff --git a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/common/resources/MemoryDistributor.java b/tez-runtime-internals/src/main/java/org/apache/tez/runtime/common/resources/MemoryDistributor.java index e63a414cf7..d8ba1f71ff 100644 --- a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/common/resources/MemoryDistributor.java +++ b/tez-runtime-internals/src/main/java/org/apache/tez/runtime/common/resources/MemoryDistributor.java @@ -25,6 +25,7 @@ import java.util.Set; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.atomic.AtomicInteger; +import java.util.Objects; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -43,7 +44,7 @@ import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Function; -import com.google.common.base.Preconditions; +import org.apache.tez.common.Preconditions; import com.google.common.collect.Iterables; // Not calling this a MemoryManager explicitly. Not yet anyway. @@ -175,9 +176,9 @@ void setJvmMemory(long size) { private long registerRequest(long requestSize, MemoryUpdateCallback callback, TaskContext entityContext, EntityDescriptor descriptor) { Preconditions.checkArgument(requestSize >= 0); - Preconditions.checkNotNull(callback); - Preconditions.checkNotNull(entityContext); - Preconditions.checkNotNull(descriptor); + Objects.requireNonNull(callback); + Objects.requireNonNull(entityContext); + Objects.requireNonNull(descriptor); if (!dupSet.add(entityContext)) { throw new TezUncheckedException( "A single entity can only make one call to request resources for now"); @@ -207,7 +208,7 @@ private long registerRequest(long requestSize, MemoryUpdateCallback callback, } private void validateAllocations(Iterable allocations, int numRequestors) { - Preconditions.checkNotNull(allocations); + Objects.requireNonNull(allocations); long totalAllocated = 0l; int numAllocations = 0; for (Long l : allocations) { diff --git a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/common/resources/ScalingAllocator.java b/tez-runtime-internals/src/main/java/org/apache/tez/runtime/common/resources/ScalingAllocator.java index aebb19be36..e045abd9d2 100644 --- a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/common/resources/ScalingAllocator.java +++ b/tez-runtime-internals/src/main/java/org/apache/tez/runtime/common/resources/ScalingAllocator.java @@ -29,7 +29,7 @@ import org.apache.tez.dag.api.TezConfiguration; import com.google.common.base.Function; -import com.google.common.base.Preconditions; +import org.apache.tez.common.Preconditions; import com.google.common.collect.Iterables; import com.google.common.collect.Lists; @@ -80,16 +80,11 @@ public Long apply(InitialMemoryRequestContext requestContext) { long requestedSize = request.getRequestedSize(); if (requestedSize == 0) { allocations.add(0l); - if (LOG.isDebugEnabled()) { - LOG.debug("Scaling requested: 0 to allocated: 0"); - } + LOG.debug("Scaling requested: 0 to allocated: 0"); } else { long allocated = (long) ((requestedSize / (double) totalRequested) * availableForAllocation); allocations.add(allocated); - if (LOG.isDebugEnabled()) { - LOG.debug("Scaling requested: " + requestedSize + " to allocated: " + allocated); - } - + LOG.debug("Scaling requested: {} to allocated: {}", requestedSize, allocated); } } return allocations; diff --git a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/internals/api/TaskReporterInterface.java b/tez-runtime-internals/src/main/java/org/apache/tez/runtime/internals/api/TaskReporterInterface.java index b7d5fb5885..6e8950c86b 100644 --- a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/internals/api/TaskReporterInterface.java +++ b/tez-runtime-internals/src/main/java/org/apache/tez/runtime/internals/api/TaskReporterInterface.java @@ -39,7 +39,7 @@ boolean taskFailed(TezTaskAttemptID taskAttemptId, String diagnostics, EventMetaData srcMeta) throws IOException, TezException; - boolean taskKilled(TezTaskAttemptID taskAttemtpId, Throwable cause, String diagnostics, + boolean taskKilled(TezTaskAttemptID taskAttemptId, Throwable cause, String diagnostics, EventMetaData srcMeta) throws IOException, TezException; void addEvents(TezTaskAttemptID taskAttemptId, Collection events); diff --git a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/internals/api/TezTrapEvent.java b/tez-runtime-internals/src/main/java/org/apache/tez/runtime/internals/api/TezTrapEvent.java new file mode 100644 index 0000000000..8c63b74007 --- /dev/null +++ b/tez-runtime-internals/src/main/java/org/apache/tez/runtime/internals/api/TezTrapEvent.java @@ -0,0 +1,52 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tez.runtime.internals.api; + +import org.apache.tez.common.TezAbstractEvent; +import org.apache.tez.runtime.api.impl.TezEvent; + +import java.util.List; + +import static org.apache.tez.runtime.internals.api.TezTrapEventType.TRAP_EVENT_TYPE; + +/** + * Event sent when no more events should be sent to the AM. + */ +public class TezTrapEvent extends TezAbstractEvent { + /** + * Events that were reported. + */ + private final List tezEvents; + + /** + * Create a tez trap event. + * @param events events tried to be sent to the AM. + */ + public TezTrapEvent(final List events) { + super(TRAP_EVENT_TYPE); + this.tezEvents = events; + } + + /** + * @return events. + */ + public final List getTezEvents() { + return tezEvents; + } +} \ No newline at end of file diff --git a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/internals/api/TezTrapEventType.java b/tez-runtime-internals/src/main/java/org/apache/tez/runtime/internals/api/TezTrapEventType.java new file mode 100644 index 0000000000..89cb78ea06 --- /dev/null +++ b/tez-runtime-internals/src/main/java/org/apache/tez/runtime/internals/api/TezTrapEventType.java @@ -0,0 +1,29 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tez.runtime.internals.api; + +/** + * Enum type with only one value representing this event. + */ +public enum TezTrapEventType { + /** + * Single value for this event type. + */ + TRAP_EVENT_TYPE +} diff --git a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/metrics/FileSystemStatisticUpdater.java b/tez-runtime-internals/src/main/java/org/apache/tez/runtime/metrics/FileSystemStatisticUpdater.java index bb15ef159f..ad48d0d624 100644 --- a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/metrics/FileSystemStatisticUpdater.java +++ b/tez-runtime-internals/src/main/java/org/apache/tez/runtime/metrics/FileSystemStatisticUpdater.java @@ -5,9 +5,9 @@ * licenses this file to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance with the License. * You may obtain a copy of the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the @@ -17,9 +17,7 @@ package org.apache.tez.runtime.metrics; -import java.util.List; - -import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.StorageStatistics; import org.apache.tez.common.counters.FileSystemCounter; import org.apache.tez.common.counters.TezCounter; import org.apache.tez.common.counters.TezCounters; @@ -30,50 +28,22 @@ */ public class FileSystemStatisticUpdater { - private List stats; - private TezCounter readBytesCounter, writeBytesCounter, readOpsCounter, largeReadOpsCounter, - writeOpsCounter; - private String scheme; - private TezCounters counters; + private final StorageStatistics stats; + private final TezCounters counters; - FileSystemStatisticUpdater(TezCounters counters, List stats, String scheme) { - this.stats = stats; - this.scheme = scheme; + FileSystemStatisticUpdater(TezCounters counters, StorageStatistics storageStatistics) { + this.stats = storageStatistics; this.counters = counters; } void updateCounters() { - if (readBytesCounter == null) { - readBytesCounter = counters.findCounter(scheme, FileSystemCounter.BYTES_READ); - } - if (writeBytesCounter == null) { - writeBytesCounter = counters.findCounter(scheme, FileSystemCounter.BYTES_WRITTEN); - } - if (readOpsCounter == null) { - readOpsCounter = counters.findCounter(scheme, FileSystemCounter.READ_OPS); - } - if (largeReadOpsCounter == null) { - largeReadOpsCounter = counters.findCounter(scheme, FileSystemCounter.LARGE_READ_OPS); - } - if (writeOpsCounter == null) { - writeOpsCounter = counters.findCounter(scheme, FileSystemCounter.WRITE_OPS); - } - long readBytes = 0; - long writeBytes = 0; - long readOps = 0; - long largeReadOps = 0; - long writeOps = 0; - for (FileSystem.Statistics stat : stats) { - readBytes = readBytes + stat.getBytesRead(); - writeBytes = writeBytes + stat.getBytesWritten(); - readOps = readOps + stat.getReadOps(); - largeReadOps = largeReadOps + stat.getLargeReadOps(); - writeOps = writeOps + stat.getWriteOps(); + // loop through FileSystemCounter enums as it is a smaller set + for (FileSystemCounter fsCounter : FileSystemCounter.values()) { + Long val = stats.getLong(fsCounter.getOpName()); + if (val != null && val != 0) { + TezCounter counter = counters.findCounter(stats.getScheme(), fsCounter); + counter.setValue(val); + } } - readBytesCounter.setValue(readBytes); - writeBytesCounter.setValue(writeBytes); - readOpsCounter.setValue(readOps); - largeReadOpsCounter.setValue(largeReadOps); - writeOpsCounter.setValue(writeOps); } } diff --git a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/metrics/TaskCounterUpdater.java b/tez-runtime-internals/src/main/java/org/apache/tez/runtime/metrics/TaskCounterUpdater.java index 48676e225b..49f8fca25f 100644 --- a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/metrics/TaskCounterUpdater.java +++ b/tez-runtime-internals/src/main/java/org/apache/tez/runtime/metrics/TaskCounterUpdater.java @@ -18,17 +18,17 @@ package org.apache.tez.runtime.metrics; -import java.util.ArrayList; import java.util.HashMap; -import java.util.List; +import java.util.Iterator; import java.util.Map; +import org.apache.hadoop.fs.GlobalStorageStatistics; +import org.apache.hadoop.fs.StorageStatistics; import org.apache.tez.util.TezMxBeanResourceCalculator; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.FileSystem.Statistics; import org.apache.hadoop.yarn.util.ResourceCalculatorProcessTree; import org.apache.tez.common.GcTimeUpdater; import org.apache.tez.common.counters.TaskCounter; @@ -49,10 +49,9 @@ public class TaskCounterUpdater { private final Configuration conf; /** - * A Map where Key-> URIScheme and value->FileSystemStatisticUpdater + * A Map where Key-> URIScheme and value->Map */ - private Map statisticUpdaters = - new HashMap(); + private final Map> statisticUpdaters = new HashMap<>(); protected final GcTimeUpdater gcUpdater; private ResourceCalculatorProcessTree pTree; private long initCpuCumulativeTime = 0; @@ -67,34 +66,18 @@ public TaskCounterUpdater(TezCounters counters, Configuration conf, String pid) recordInitialCpuStats(); } - + public void updateCounters() { - // FileSystemStatistics are reset each time a new task is seen by the - // container. - // This doesn't remove the fileSystem, and does not clear all statistics - - // so there is a potential of an unused FileSystem showing up for a - // Container, and strange values for READ_OPS etc. - Map> map = new - HashMap>(); - for(Statistics stat: FileSystem.getAllStatistics()) { - String uriScheme = stat.getScheme(); - if (map.containsKey(uriScheme)) { - List list = map.get(uriScheme); - list.add(stat); - } else { - List list = new ArrayList(); - list.add(stat); - map.put(uriScheme, list); - } - } - for (Map.Entry> entry: map.entrySet()) { - FileSystemStatisticUpdater updater = statisticUpdaters.get(entry.getKey()); - if(updater==null) {//new FileSystem has been found in the cache - updater = - new FileSystemStatisticUpdater(tezCounters, entry.getValue(), - entry.getKey()); - statisticUpdaters.put(entry.getKey(), updater); - } + GlobalStorageStatistics globalStorageStatistics = FileSystem.getGlobalStorageStatistics(); + Iterator iter = globalStorageStatistics.iterator(); + while (iter.hasNext()) { + StorageStatistics stats = iter.next(); + // Fetch or initialize the updater set for the scheme + Map updaterSet = statisticUpdaters + .computeIfAbsent(stats.getScheme(), k -> new HashMap<>()); + // Fetch or create the updater for the specific statistic + FileSystemStatisticUpdater updater = updaterSet + .computeIfAbsent(stats.getName(), k -> new FileSystemStatisticUpdater(tezCounters, stats)); updater.updateCounters(); } diff --git a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/task/ContainerReporter.java b/tez-runtime-internals/src/main/java/org/apache/tez/runtime/task/ContainerReporter.java index 0ece2271c5..828e948fed 100644 --- a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/task/ContainerReporter.java +++ b/tez-runtime-internals/src/main/java/org/apache/tez/runtime/task/ContainerReporter.java @@ -69,14 +69,15 @@ protected ContainerTask callInternal() throws Exception { + " ms after starting to poll." + " TaskInfo: shouldDie: " + containerTask.shouldDie() - + (containerTask.shouldDie() == true ? "" : ", currentTaskAttemptId: " - + containerTask.getTaskSpec().getTaskAttemptID())); + + (containerTask.shouldDie() ? "" : ", currentTaskAttemptId: " + + (containerTask.getTaskSpec() == null ? "none" + : containerTask.getTaskSpec().getTaskAttemptID()))); return containerTask; } private void maybeLogSleepMessage(long sleepTimeMilliSecs) { long currentTime = System.currentTimeMillis(); - if (sleepTimeMilliSecs + currentTime > nextGetTaskPrintTime) { + if ((sleepTimeMilliSecs + currentTime) - nextGetTaskPrintTime > 0) { LOG.info("Sleeping for " + sleepTimeMilliSecs + "ms before retrying getTask again. Got null now. " + "Next getTask sleep message after " + LOG_INTERVAL + "ms"); diff --git a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/task/TaskReporter.java b/tez-runtime-internals/src/main/java/org/apache/tez/runtime/task/TaskReporter.java index 809ce325ec..5b1a9544b1 100644 --- a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/task/TaskReporter.java +++ b/tez-runtime-internals/src/main/java/org/apache/tez/runtime/task/TaskReporter.java @@ -19,6 +19,9 @@ package org.apache.tez.runtime.task; import java.io.IOException; +import java.lang.management.ManagementFactory; +import java.lang.management.MemoryMXBean; +import java.net.InetAddress; import java.util.ArrayList; import java.util.Collection; import java.util.List; @@ -35,11 +38,13 @@ import org.apache.commons.lang.exception.ExceptionUtils; import org.apache.hadoop.util.ShutdownHookManager; +import org.apache.tez.common.GuavaShim; import org.apache.tez.common.TezTaskUmbilicalProtocol; import org.apache.tez.common.counters.TezCounters; import org.apache.tez.dag.api.TezException; import org.apache.tez.dag.records.TezTaskAttemptID; import org.apache.tez.runtime.RuntimeTask; +import org.apache.tez.runtime.RuntimeTask.LocalWriteLimitException; import org.apache.tez.runtime.api.*; import org.apache.tez.runtime.api.events.TaskAttemptCompletedEvent; import org.apache.tez.runtime.api.events.TaskAttemptFailedEvent; @@ -68,7 +73,7 @@ * Responsible for communication between tasks running in a Container and the ApplicationMaster. * Takes care of sending heartbeats (regular and OOB) to the AM - to send generated events, and to * retrieve events specific to this task. - * + * */ public class TaskReporter implements TaskReporterInterface { @@ -108,7 +113,7 @@ public synchronized void registerTask(RuntimeTask task, currentCallable = new HeartbeatCallable(task, umbilical, pollInterval, sendCounterInterval, maxEventsToGet, requestCounter, containerIdStr); ListenableFuture future = heartbeatExecutor.submit(currentCallable); - Futures.addCallback(future, new HeartbeatCallback(errorReporter)); + Futures.addCallback(future, new HeartbeatCallback(errorReporter), GuavaShim.directExecutor()); } /** @@ -135,6 +140,9 @@ static class HeartbeatCallable implements Callable { private static final int LOG_COUNTER_START_INTERVAL = 5000; // 5 seconds private static final float LOG_COUNTER_BACKOFF = 1.3f; + private static final int HEAP_MEMORY_USAGE_UPDATE_INTERVAL = 5000; // 5 seconds + + private static final int LOCAL_FILE_SYSTEM_BYTES_WRITTEN_CHECK_INTERVAL = 10000; // 10 seconds private final RuntimeTask task; private final EventMetaData updateEventMetadata; @@ -156,6 +164,13 @@ static class HeartbeatCallable implements Callable { private final ReentrantLock lock = new ReentrantLock(); private final Condition condition = lock.newCondition(); + private final MemoryMXBean memoryMXBean = ManagementFactory.getMemoryMXBean(); + private long usedMemory = 0; + private long heapMemoryUsageUpdatedTime = System.currentTimeMillis() - HEAP_MEMORY_USAGE_UPDATE_INTERVAL; + + private long localFileSystemBytesWrittenCheckInterval = + System.currentTimeMillis() - LOCAL_FILE_SYSTEM_BYTES_WRITTEN_CHECK_INTERVAL; + /* * Keeps track of regular timed heartbeats. Is primarily used as a timing mechanism to send / * log counters. @@ -163,7 +178,7 @@ static class HeartbeatCallable implements Callable { private AtomicInteger nonOobHeartbeatCounter = new AtomicInteger(0); private int nextHeartbeatNumToLog = 0; /* - * Tracks the last non-OOB heartbeat number at which counters were sent to the AM. + * Tracks the last non-OOB heartbeat number at which counters were sent to the AM. */ private int prevCounterSendHeartbeatNum = 0; @@ -253,6 +268,17 @@ private synchronized ResponseWrapper heartbeat(Collection eventsArg) t sendCounters = true; prevCounterSendHeartbeatNum = nonOobHeartbeatCounter.get(); } + try { + long now = System.currentTimeMillis(); + if (now - localFileSystemBytesWrittenCheckInterval > LOCAL_FILE_SYSTEM_BYTES_WRITTEN_CHECK_INTERVAL) { + task.checkTaskLimits(); + localFileSystemBytesWrittenCheckInterval = now; + } + } catch (LocalWriteLimitException lwle) { + LOG.error("Local FileSystem write limit exceeded", lwle); + askedToDie.set(true); + return new ResponseWrapper(true, 1); + } updateEvent = new TezEvent(getStatusUpdateEvent(sendCounters), updateEventMetadata); events.add(updateEvent); } @@ -262,17 +288,13 @@ private synchronized ResponseWrapper heartbeat(Collection eventsArg) t int fromPreRoutedEventId = task.getNextPreRoutedEventId(); int maxEvents = Math.min(maxEventsToGet, task.getMaxEventsToHandle()); TezHeartbeatRequest request = new TezHeartbeatRequest(requestId, events, fromPreRoutedEventId, - containerIdStr, task.getTaskAttemptID(), fromEventId, maxEvents); - if (LOG.isDebugEnabled()) { - LOG.debug("Sending heartbeat to AM, request=" + request); - } + containerIdStr, task.getTaskAttemptID(), fromEventId, maxEvents, getUsedMemory()); + LOG.debug("Sending heartbeat to AM, request={}", request); maybeLogCounters(); TezHeartbeatResponse response = umbilical.heartbeat(request); - if (LOG.isDebugEnabled()) { - LOG.debug("Received heartbeat response from AM, response=" + response); - } + LOG.debug("Received heartbeat response from AM, response={}", response); if (response.shouldDie()) { LOG.info("Received should die response from AM"); @@ -308,6 +330,15 @@ private synchronized ResponseWrapper heartbeat(Collection eventsArg) t return new ResponseWrapper(false, numEventsReceived); } + private long getUsedMemory() { + long now = System.currentTimeMillis(); + if (now - heapMemoryUsageUpdatedTime > HEAP_MEMORY_USAGE_UPDATE_INTERVAL) { + usedMemory = memoryMXBean.getHeapMemoryUsage().getUsed(); + heapMemoryUsageUpdatedTime = now; + } + return usedMemory; + } + public void markComplete() { // Notify to clear pending events, if any. lock.lock(); @@ -348,7 +379,7 @@ private boolean taskSucceeded(TezTaskAttemptID taskAttemptID) throws IOException return askedToDie.get(); } } - + @VisibleForTesting TaskStatusUpdateEvent getStatusUpdateEvent(boolean sendCounters) { TezCounters counters = null; @@ -388,9 +419,10 @@ private boolean taskTerminated(TezTaskAttemptID taskAttemptID, boolean isKilled, if (!finalEventQueued.getAndSet(true)) { List tezEvents = new ArrayList(); if (diagnostics == null) { - diagnostics = ExceptionUtils.getStackTrace(t); + diagnostics = "Node: " + InetAddress.getLocalHost() + " : " + ExceptionUtils.getStackTrace(t); } else { - diagnostics = diagnostics + ":" + ExceptionUtils.getStackTrace(t); + diagnostics = + "Node: " + InetAddress.getLocalHost() + " : " + diagnostics + ":" + ExceptionUtils.getStackTrace(t); } if (isKilled) { tezEvents.add(new TezEvent(new TaskAttemptKilledEvent(diagnostics), @@ -459,10 +491,10 @@ public synchronized boolean taskFailed(TezTaskAttemptID taskAttemptID, } @Override - public boolean taskKilled(TezTaskAttemptID taskAttemptID, Throwable t, String diagnostics, + public boolean taskKilled(TezTaskAttemptID taskAttemptId, Throwable t, String diagnostics, EventMetaData srcMeta) throws IOException, TezException { if(!isShuttingDown()) { - return currentCallable.taskTerminated(taskAttemptID, true, null, t, diagnostics, srcMeta); + return currentCallable.taskTerminated(taskAttemptId, true, null, t, diagnostics, srcMeta); } return false; } diff --git a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/task/TaskRunner2Callable.java b/tez-runtime-internals/src/main/java/org/apache/tez/runtime/task/TaskRunner2Callable.java index b39af69c40..e6a74321f1 100644 --- a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/task/TaskRunner2Callable.java +++ b/tez-runtime-internals/src/main/java/org/apache/tez/runtime/task/TaskRunner2Callable.java @@ -18,11 +18,16 @@ import java.security.PrivilegedExceptionAction; import java.util.concurrent.atomic.AtomicBoolean; +import org.apache.commons.lang.StringUtils; +import org.apache.hadoop.fs.statistics.IOStatisticsContext; +import org.apache.hadoop.fs.statistics.IOStatisticsLogging; import org.apache.hadoop.security.UserGroupInformation; import org.apache.tez.common.CallableWithNdc; +import org.apache.tez.common.TezCommonUtils; import org.apache.tez.common.TezUtilsInternal; import org.apache.tez.common.counters.TezCounters; import org.apache.tez.runtime.LogicalIOProcessorRuntimeTask; +import org.apache.tez.runtime.api.impl.TezUmbilical; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -45,10 +50,16 @@ public class TaskRunner2Callable extends CallableWithNdc outputContext.trapEvents(new TezTrapEventHandler(outputContext, + this.tezUmbilical))); task.cleanup(); } } diff --git a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/task/TezChild.java b/tez-runtime-internals/src/main/java/org/apache/tez/runtime/task/TezChild.java index bae7f522d3..ed14bd880c 100644 --- a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/task/TezChild.java +++ b/tez-runtime-internals/src/main/java/org/apache/tez/runtime/task/TezChild.java @@ -49,8 +49,10 @@ import org.apache.hadoop.yarn.YarnUncaughtExceptionHandler; import org.apache.hadoop.yarn.api.ApplicationConstants; import org.apache.hadoop.yarn.api.ApplicationConstants.Environment; +import org.apache.log4j.helpers.ThreadLocalMap; import org.apache.tez.common.ContainerContext; import org.apache.tez.common.ContainerTask; +import org.apache.tez.common.ReflectionUtils; import org.apache.tez.common.TezCommonUtils; import org.apache.tez.common.TezExecutors; import org.apache.tez.common.TezLocalResource; @@ -61,9 +63,9 @@ import org.apache.tez.common.security.JobTokenIdentifier; import org.apache.tez.common.security.TokenCache; import org.apache.tez.dag.api.TezConfiguration; -import org.apache.tez.dag.api.TezConstants; import org.apache.tez.dag.api.TezException; import org.apache.tez.dag.api.records.DAGProtos; +import org.apache.tez.dag.records.TezTaskAttemptID; import org.apache.tez.dag.records.TezVertexID; import org.apache.tez.dag.utils.RelocalizationUtils; import org.apache.tez.hadoop.shim.HadoopShim; @@ -71,12 +73,18 @@ import org.apache.tez.runtime.api.ExecutionContext; import org.apache.tez.runtime.api.impl.ExecutionContextImpl; import org.apache.tez.runtime.common.objectregistry.ObjectRegistryImpl; +import org.apache.tez.runtime.hook.TezTaskAttemptHook; import org.apache.tez.runtime.internals.api.TaskReporterInterface; +import org.apache.tez.util.LoggingUtils; + +import org.apache.tez.util.TezRuntimeShutdownHandler; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.google.common.base.Function; -import com.google.common.base.Preconditions; +import org.apache.tez.common.Preconditions; +import org.apache.tez.common.TezClassLoader; + import com.google.common.collect.HashMultimap; import com.google.common.collect.Maps; import com.google.common.collect.Multimap; @@ -123,6 +131,7 @@ public class TezChild { private TezVertexID lastVertexID; private final HadoopShim hadoopShim; private final TezExecutors sharedExecutor; + private ThreadLocalMap mdcContext; public TezChild(Configuration conf, String host, int port, String containerIdentifier, String tokenIdentifier, int appAttemptNumber, String workingDir, String[] localDirs, @@ -131,6 +140,7 @@ public TezChild(Configuration conf, String host, int port, String containerIdent ExecutionContext executionContext, Credentials credentials, long memAvailable, String user, TezTaskUmbilicalProtocol umbilical, boolean updateSysCounters, HadoopShim hadoopShim) throws IOException, InterruptedException { + this.mdcContext = LoggingUtils.setupLog4j(); this.defaultConf = conf; this.containerIdString = containerIdentifier; this.appAttemptNumber = appAttemptNumber; @@ -170,7 +180,7 @@ public TezChild(Configuration conf, String host, int port, String containerIdent if (LOG.isDebugEnabled()) { LOG.debug("Executing with tokens:"); for (Token token : credentials.getAllTokens()) { - LOG.debug("",token); + LOG.debug("{}", token); } } @@ -198,6 +208,7 @@ public TezTaskUmbilicalProtocol run() throws Exception { this.umbilical = umbilical; ownUmbilical = false; } + TezCommonUtils.logCredentials(LOG, credentials, "tezChildInit"); } public ContainerExecutionResult run() throws IOException, InterruptedException, TezException { @@ -213,7 +224,7 @@ public ContainerExecutionResult run() throws IOException, InterruptedException, while (!executor.isTerminated() && !isShutdown.get()) { if (taskCount > 0) { - TezUtilsInternal.updateLoggers(""); + TezUtilsInternal.updateLoggers(defaultConf, "", LoggingUtils.getPatternForTask(defaultConf)); } ListenableFuture getTaskFuture = executor.submit(containerReporter); boolean error = false; @@ -237,13 +248,28 @@ public ContainerExecutionResult run() throws IOException, InterruptedException, shutdown(); } } + + TezCommonUtils.logCredentials(LOG, containerTask.getCredentials(), "containerTask"); if (containerTask.shouldDie()) { LOG.info("ContainerTask returned shouldDie=true for container {}, Exiting", containerIdString); shutdown(); return new ContainerExecutionResult(ContainerExecutionResult.ExitStatus.SUCCESS, null, "Asked to die by the AM"); } else { - String loggerAddend = containerTask.getTaskSpec().getTaskAttemptID().toString(); + TezTaskAttemptID attemptId = containerTask.getTaskSpec().getTaskAttemptID(); + Configuration taskConf; + if (containerTask.getTaskSpec().getTaskConf() != null) { + Configuration copy = new Configuration(defaultConf); + TezTaskRunner2.mergeTaskSpecConfToConf(containerTask.getTaskSpec(), copy); + taskConf = copy; + LoggingUtils.initLoggingContext(mdcContext, copy, attemptId.getTaskID().getVertexID().getDAGID().toString(), + attemptId.toString()); + } else { + taskConf = defaultConf; + LoggingUtils.initLoggingContext(mdcContext, defaultConf, + attemptId.getTaskID().getVertexID().getDAGID().toString(), attemptId.toString()); + } + String loggerAddend = attemptId.toString(); taskCount++; String timeStamp = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").format(Calendar.getInstance().getTime()); System.err.println(timeStamp + " Starting to run new task attempt: " + @@ -252,10 +278,12 @@ public ContainerExecutionResult run() throws IOException, InterruptedException, containerTask.getTaskSpec().getTaskAttemptID().toString()); TezUtilsInternal.setHadoopCallerContext(hadoopShim, containerTask.getTaskSpec().getTaskAttemptID()); - TezUtilsInternal.updateLoggers(loggerAddend); + TezUtilsInternal.updateLoggers(defaultConf, loggerAddend, LoggingUtils.getPatternForTask(defaultConf)); + FileSystem.clearStatistics(); childUGI = handleNewTaskCredentials(containerTask, childUGI); + TezCommonUtils.logCredentials(LOG, childUGI.getCredentials(), "taskChildUGI"); handleNewTaskLocalResources(containerTask, childUGI); cleanupOnTaskChanged(containerTask); @@ -265,7 +293,15 @@ public ContainerExecutionResult run() throws IOException, InterruptedException, serviceConsumerMetadata, serviceProviderEnvMap, startedInputsMap, taskReporter, executor, objectRegistry, pid, executionContext, memAvailable, updateSysCounters, hadoopShim, sharedExecutor); + boolean shouldDie; + final String[] hookClasses = taskConf + .getStrings(TezConfiguration.TEZ_TASK_ATTEMPT_HOOKS, new String[0]); + final TezTaskAttemptHook[] hooks = new TezTaskAttemptHook[hookClasses.length]; + for (int i = 0; i < hooks.length; i++) { + hooks[i] = ReflectionUtils.createClazzInstance(hookClasses[i]); + hooks[i].start(attemptId, taskConf); + } try { TaskRunner2Result result = taskRunner.run(); LOG.info("TaskRunner2Result: {}", result); @@ -284,6 +320,9 @@ public ContainerExecutionResult run() throws IOException, InterruptedException, e, "TaskExecutionFailure: " + e.getMessage()); } } finally { + for (TezTaskAttemptHook hook : hooks) { + hook.stop(); + } FileSystem.closeAllForUGI(childUGI); } } @@ -332,9 +371,7 @@ private void handleNewTaskLocalResources(ContainerTask containerTask, UserGroupInformation ugi) throws IOException, TezException { final Map additionalResources = containerTask.getAdditionalResources(); - if (LOG.isDebugEnabled()) { - LOG.debug("Additional Resources added to container: " + additionalResources); - } + LOG.debug("Additional Resources added to container: {}", additionalResources); if (additionalResources != null && !additionalResources.isEmpty()) { LOG.info("Localizing additional local resources for Task : " + additionalResources); @@ -369,13 +406,12 @@ public URI apply(TezLocalResource input) { private void cleanupOnTaskChanged(ContainerTask containerTask) { Preconditions.checkState(!containerTask.shouldDie()); Preconditions.checkState(containerTask.getTaskSpec() != null); - TezVertexID newVertexID = containerTask.getTaskSpec().getTaskAttemptID().getTaskID() - .getVertexID(); + TezVertexID newVertexID = containerTask.getTaskSpec().getTaskAttemptID().getVertexID(); if (lastVertexID != null) { if (!lastVertexID.equals(newVertexID)) { objectRegistry.clearCache(ObjectRegistryImpl.ObjectLifeCycle.VERTEX); } - if (!lastVertexID.getDAGId().equals(newVertexID.getDAGId())) { + if (!lastVertexID.getDAGID().equals(newVertexID.getDAGID())) { objectRegistry.clearCache(ObjectRegistryImpl.ObjectLifeCycle.DAG); startedInputsMap = HashMultimap.create(); } @@ -389,8 +425,10 @@ public void shutdown() { LOG.info("Shutting down container {}", containerIdString); // It's possible that there's pending tasks on the executor. Those should be cancelled. List pendingRunnables = executor.shutdownNow(); + LOG.info("There are {} runnables in shared executor, cancelling those...", pendingRunnables.size()); for (Runnable r : pendingRunnables) { - LOG.info("Cancelling pending runnables during TezChild shutdown for containerId={}", containerIdString); + LOG.info("Cancelling pending runnable ({}) during TezChild shutdown for containerId={}", r.hashCode(), + containerIdString); ((FutureTask)r).cancel(false); } if (taskReporter != null) { @@ -400,6 +438,9 @@ public void shutdown() { RPC.stopProxy(umbilical); } } + + TezRuntimeShutdownHandler.shutdown(); + LOG.info("TezChild shutdown finished"); } public static class ContainerExecutionResult { @@ -477,7 +518,7 @@ public static TezChild newTezChild(Configuration conf, String host, int port, St } public static void main(String[] args) throws IOException, InterruptedException, TezException { - + TezClassLoader.setupTezClassLoader(); final Configuration defaultConf = new Configuration(); Thread.setDefaultUncaughtExceptionHandler(new YarnUncaughtExceptionHandler()); @@ -521,7 +562,8 @@ public static void main(String[] args) throws IOException, InterruptedException, System.getenv(), pid, new ExecutionContextImpl(System.getenv(Environment.NM_HOST.name())), credentials, Runtime.getRuntime().maxMemory(), System .getenv(ApplicationConstants.Environment.USER.toString()), null, true, hadoopShim); - tezChild.run(); + ContainerExecutionResult result = tezChild.run(); + LOG.info("TezChild is about to exit from main(), run() returned result: {}", result.toString()); } private void handleError(Throwable t) { diff --git a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/task/TezTaskRunner2.java b/tez-runtime-internals/src/main/java/org/apache/tez/runtime/task/TezTaskRunner2.java index 306f2a77cd..2f1be9c000 100644 --- a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/task/TezTaskRunner2.java +++ b/tez-runtime-internals/src/main/java/org/apache/tez/runtime/task/TezTaskRunner2.java @@ -29,7 +29,9 @@ import java.util.concurrent.locks.ReentrantLock; import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; + +import org.apache.hadoop.fs.ClusterStorageCapacityExceededException; +import org.apache.tez.common.Preconditions; import com.google.common.collect.Multimap; import org.apache.commons.lang.exception.ExceptionUtils; import org.apache.hadoop.conf.Configuration; @@ -89,7 +91,7 @@ public class TezTaskRunner2 { // TaskRunnerCallable, a failure to heartbeat, or a signalFatalError on the context. private volatile Throwable firstException; private volatile EventMetaData exceptionSourceInfo; - private volatile TaskFailureType firstTaskFailureType; + volatile TaskFailureType firstTaskFailureType; private final AtomicBoolean errorReporterToAm = new AtomicBoolean(false); private volatile boolean oobSignalErrorInProgress = false; @@ -104,7 +106,7 @@ public class TezTaskRunner2 { // The callable which is being used to execute the task. private volatile TaskRunner2Callable taskRunnerCallable; - // This instance is set only if the runner was not configured explicity and will be shutdown + // This instance is set only if the runner was not configured explicitly and will be shutdown // when this task is finished. private final TezSharedExecutor localExecutor; @@ -140,18 +142,22 @@ public TezTaskRunner2(Configuration tezConf, UserGroupInformation ugi, String[] this.umbilicalAndErrorHandler = new UmbilicalAndErrorHandler(); this.hadoopShim = hadoopShim; this.taskConf = new Configuration(tezConf); + mergeTaskSpecConfToConf(taskSpec, taskConf); + localExecutor = sharedExecutor == null ? new TezSharedExecutor(tezConf) : null; + this.task = new LogicalIOProcessorRuntimeTask(taskSpec, appAttemptNumber, taskConf, localDirs, + umbilicalAndErrorHandler, serviceConsumerMetadata, serviceProviderEnvMap, startedInputsMap, + objectRegistry, pid, executionContext, memAvailable, updateSysCounters, hadoopShim, + sharedExecutor == null ? localExecutor : sharedExecutor); + } + + static void mergeTaskSpecConfToConf(TaskSpec taskSpec, Configuration conf) { if (taskSpec.getTaskConf() != null) { Iterator> iter = taskSpec.getTaskConf().iterator(); while (iter.hasNext()) { Entry entry = iter.next(); - taskConf.set(entry.getKey(), entry.getValue()); + conf.set(entry.getKey(), entry.getValue()); } } - localExecutor = sharedExecutor == null ? new TezSharedExecutor(tezConf) : null; - this.task = new LogicalIOProcessorRuntimeTask(taskSpec, appAttemptNumber, taskConf, localDirs, - umbilicalAndErrorHandler, serviceConsumerMetadata, serviceProviderEnvMap, startedInputsMap, - objectRegistry, pid, executionContext, memAvailable, updateSysCounters, hadoopShim, - sharedExecutor == null ? localExecutor : sharedExecutor); } /** @@ -177,7 +183,8 @@ public TaskRunner2Result run() { // Safe to do this within a synchronized block because we're providing // the handler on which the Reporter will communicate back. Assuming // the register call doesn't end up hanging. - taskRunnerCallable = new TaskRunner2Callable(task, ugi); + taskRunnerCallable = new TaskRunner2Callable(task, ugi, + umbilicalAndErrorHandler); taskReporter.registerTask(task, umbilicalAndErrorHandler); future = executor.submit(taskRunnerCallable); } @@ -198,7 +205,7 @@ public TaskRunner2Result run() { synchronized (this) { if (isRunningState()) { trySettingEndReason(EndReason.TASK_ERROR); - registerFirstException(TaskFailureType.NON_FATAL, e, null); + registerFirstException(getTaskFailureType(e), e, null); LOG.warn("Exception from RunnerCallable", e); } } @@ -291,7 +298,7 @@ public TaskRunner2Result run() { // It's possible for the task to actually complete, and an alternate signal such as killTask/killContainer // come in before the future has been processed by this thread. That condition is not handled - and - // the result of the execution will be determind by the thread order. + // the result of the execution will be determined by the thread order. @VisibleForTesting void processCallableResult(TaskRunner2CallableResult executionResult) { if (executionResult != null) { @@ -299,7 +306,7 @@ void processCallableResult(TaskRunner2CallableResult executionResult) { if (isRunningState()) { if (executionResult.error != null) { trySettingEndReason(EndReason.TASK_ERROR); - registerFirstException(TaskFailureType.NON_FATAL, executionResult.error, null); + registerFirstException(getTaskFailureType(executionResult.error), executionResult.error, null); } else { trySettingEndReason(EndReason.SUCCESS); taskComplete.set(true); @@ -578,4 +585,13 @@ private void logAborting(String abortReason) { LOG.info("Attempting to abort {} due to an invocation of {}", task.getTaskAttemptID(), abortReason); } + + private TaskFailureType getTaskFailureType(Throwable e) { + boolean hasClusterStorageCapacityExceededException = + ExceptionUtils.indexOfType(e, ClusterStorageCapacityExceededException.class) != -1; + if (hasClusterStorageCapacityExceededException) { + return TaskFailureType.FATAL; + } + return TaskFailureType.NON_FATAL; + } } \ No newline at end of file diff --git a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/task/TezTrapEventHandler.java b/tez-runtime-internals/src/main/java/org/apache/tez/runtime/task/TezTrapEventHandler.java new file mode 100644 index 0000000000..b35dbb0162 --- /dev/null +++ b/tez-runtime-internals/src/main/java/org/apache/tez/runtime/task/TezTrapEventHandler.java @@ -0,0 +1,92 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +package org.apache.tez.runtime.task; + +import com.google.common.base.Preconditions; +import org.apache.hadoop.yarn.event.EventHandler; +import org.apache.tez.runtime.api.OutputContext; +import org.apache.tez.runtime.api.impl.TezEvent; +import org.apache.tez.runtime.api.impl.TezOutputContextImpl; +import org.apache.tez.runtime.api.impl.TezUmbilical; +import org.apache.tez.runtime.internals.api.TezTrapEvent; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.ArrayList; +import java.util.List; + +/** + * Class that handles the events after the trap has been activated. At + * this point no more events of some types shouldn't be sent and it's + * a bug to do so. If the events arrive here probably the task will be + * restarted because it has failed. + */ +public class TezTrapEventHandler implements EventHandler { + /** + * logger. + */ + private static final Logger + LOG = LoggerFactory.getLogger(TezOutputContextImpl.class); + + /** + * Output context that will report the events. + */ + private final OutputContext outputContext; + + /** + * Protocol to send the events. + */ + private final TezUmbilical tezUmbilical; + + /** + * @param output context that will report the events. + * @param umbilical used to send the events to the AM. + */ + TezTrapEventHandler(final OutputContext output, + final TezUmbilical umbilical) { + this.outputContext = output; + this.tezUmbilical = umbilical; + } + + /** + * Decide what to do with the events. + * @param tezTrapEvent event holding the tez events. + */ + @Override + public final void handle(final TezTrapEvent tezTrapEvent) { + Preconditions.checkArgument(tezTrapEvent.getTezEvents() != null); + List tezEvents = new ArrayList( + tezTrapEvent.getTezEvents().size()); + for (TezEvent tezEvent: tezTrapEvent.getTezEvents()) { + switch (tezEvent.getEventType()) { + case COMPOSITE_DATA_MOVEMENT_EVENT: + case DATA_MOVEMENT_EVENT: + String errorMsg = "Some events won't be sent to the AM because all" + + " the events should have been sent at this point. Most likely" + + " this would result in a bug. " + + " event:" + tezEvent.toString(); + Throwable throwable = new Throwable(errorMsg); + LOG.error(errorMsg, throwable); + break; + default: + LOG.info("Event of type " + tezEvent.getEventType() + " will be sent" + + " to the AM after the task was closed "); + tezEvents.add(tezEvent); + } + } + tezUmbilical.addEvents(tezEvents); + } +} \ No newline at end of file diff --git a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/task/ThreadDumpTaskAttemptHook.java b/tez-runtime-internals/src/main/java/org/apache/tez/runtime/task/ThreadDumpTaskAttemptHook.java new file mode 100644 index 0000000000..dd41cee9d2 --- /dev/null +++ b/tez-runtime-internals/src/main/java/org/apache/tez/runtime/task/ThreadDumpTaskAttemptHook.java @@ -0,0 +1,41 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tez.runtime.task; + +import org.apache.hadoop.conf.Configuration; +import org.apache.tez.dag.records.TezTaskAttemptID; +import org.apache.tez.runtime.TezThreadDumpHelper; +import org.apache.tez.runtime.hook.TezTaskAttemptHook; + +/** + * A task attempt hook which dumps thread information periodically. + */ +public class ThreadDumpTaskAttemptHook implements TezTaskAttemptHook { + private TezThreadDumpHelper helper; + + @Override + public void start(TezTaskAttemptID id, Configuration conf) { + helper = TezThreadDumpHelper.getInstance(conf).start(id.toString()); + } + + @Override + public void stop() { + helper.stop(); + } +} diff --git a/tez-runtime-internals/src/main/javadoc/resources/META-INF/LICENSE.txt b/tez-runtime-internals/src/main/javadoc/resources/META-INF/LICENSE similarity index 100% rename from tez-runtime-internals/src/main/javadoc/resources/META-INF/LICENSE.txt rename to tez-runtime-internals/src/main/javadoc/resources/META-INF/LICENSE diff --git a/tez-runtime-internals/src/main/javadoc/resources/META-INF/NOTICE b/tez-runtime-internals/src/main/javadoc/resources/META-INF/NOTICE new file mode 100644 index 0000000000..2595905699 --- /dev/null +++ b/tez-runtime-internals/src/main/javadoc/resources/META-INF/NOTICE @@ -0,0 +1,6 @@ +Apache Tez +Copyright 2014-2024 The Apache Software Foundation + +This product includes software developed at +The Apache Software Foundation (http://www.apache.org/). + diff --git a/tez-runtime-internals/src/main/javadoc/resources/META-INF/NOTICE.txt b/tez-runtime-internals/src/main/javadoc/resources/META-INF/NOTICE.txt deleted file mode 100644 index 3f36fcc6ba..0000000000 --- a/tez-runtime-internals/src/main/javadoc/resources/META-INF/NOTICE.txt +++ /dev/null @@ -1,6 +0,0 @@ -Apache Tez -Copyright (c) 2016 The Apache Software Foundation - -This product includes software developed at -The Apache Software Foundation (http://www.apache.org/). - diff --git a/tez-runtime-internals/src/main/resources/META-INF/LICENSE.txt b/tez-runtime-internals/src/main/resources/META-INF/LICENSE similarity index 100% rename from tez-runtime-internals/src/main/resources/META-INF/LICENSE.txt rename to tez-runtime-internals/src/main/resources/META-INF/LICENSE diff --git a/tez-runtime-internals/src/main/resources/META-INF/NOTICE b/tez-runtime-internals/src/main/resources/META-INF/NOTICE new file mode 100644 index 0000000000..2595905699 --- /dev/null +++ b/tez-runtime-internals/src/main/resources/META-INF/NOTICE @@ -0,0 +1,6 @@ +Apache Tez +Copyright 2014-2024 The Apache Software Foundation + +This product includes software developed at +The Apache Software Foundation (http://www.apache.org/). + diff --git a/tez-runtime-internals/src/main/resources/META-INF/NOTICE.txt b/tez-runtime-internals/src/main/resources/META-INF/NOTICE.txt deleted file mode 100644 index 3f36fcc6ba..0000000000 --- a/tez-runtime-internals/src/main/resources/META-INF/NOTICE.txt +++ /dev/null @@ -1,6 +0,0 @@ -Apache Tez -Copyright (c) 2016 The Apache Software Foundation - -This product includes software developed at -The Apache Software Foundation (http://www.apache.org/). - diff --git a/tez-runtime-internals/src/test/java/org/apache/tez/runtime/TestInputReadyTracker.java b/tez-runtime-internals/src/test/java/org/apache/tez/runtime/TestInputReadyTracker.java index 18463541fa..e10c83f9a5 100644 --- a/tez-runtime-internals/src/test/java/org/apache/tez/runtime/TestInputReadyTracker.java +++ b/tez-runtime-internals/src/test/java/org/apache/tez/runtime/TestInputReadyTracker.java @@ -44,7 +44,7 @@ public class TestInputReadyTracker { - private static final long SLEEP_TIME = 2000l; + private static final long SLEEP_TIME = 200l; @Test(timeout = 20000) public void testWithoutGrouping1() throws InterruptedException { diff --git a/tez-runtime-internals/src/test/java/org/apache/tez/runtime/TestLogicalIOProcessorRuntimeTask.java b/tez-runtime-internals/src/test/java/org/apache/tez/runtime/TestLogicalIOProcessorRuntimeTask.java index c1bb3a13c7..1524cac357 100644 --- a/tez-runtime-internals/src/test/java/org/apache/tez/runtime/TestLogicalIOProcessorRuntimeTask.java +++ b/tez-runtime-internals/src/test/java/org/apache/tez/runtime/TestLogicalIOProcessorRuntimeTask.java @@ -22,14 +22,24 @@ import static org.junit.Assert.assertNull; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; +import static org.mockito.Mockito.anyList; import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.never; +import static org.mockito.Mockito.only; +import static org.mockito.Mockito.verify; +import java.io.IOException; import java.nio.ByteBuffer; +import java.util.Arrays; +import java.util.Collections; import java.util.HashMap; import java.util.LinkedList; import java.util.List; import java.util.Map; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.security.UserGroupInformation; +import org.apache.tez.common.TezExecutors; import org.apache.tez.common.TezSharedExecutor; import org.apache.tez.dag.api.InputDescriptor; import org.apache.tez.dag.api.OutputDescriptor; @@ -40,29 +50,36 @@ import org.apache.tez.dag.records.TezTaskID; import org.apache.tez.dag.records.TezVertexID; import org.apache.tez.hadoop.shim.DefaultHadoopShim; +import org.apache.tez.hadoop.shim.HadoopShim; import org.apache.tez.runtime.api.AbstractLogicalIOProcessor; import org.apache.tez.runtime.api.AbstractLogicalInput; import org.apache.tez.runtime.api.AbstractLogicalOutput; import org.apache.tez.runtime.api.Event; +import org.apache.tez.runtime.api.ExecutionContext; import org.apache.tez.runtime.api.LogicalInput; import org.apache.tez.runtime.api.LogicalOutput; +import org.apache.tez.runtime.api.ObjectRegistry; import org.apache.tez.runtime.api.Reader; import org.apache.tez.runtime.api.InputContext; import org.apache.tez.runtime.api.OutputContext; import org.apache.tez.runtime.api.ProcessorContext; import org.apache.tez.runtime.api.Writer; +import org.apache.tez.runtime.api.events.CompositeDataMovementEvent; import org.apache.tez.runtime.api.impl.ExecutionContextImpl; import org.apache.tez.runtime.api.impl.InputSpec; import org.apache.tez.runtime.api.impl.OutputSpec; import org.apache.tez.runtime.api.impl.TaskSpec; +import org.apache.tez.runtime.api.impl.TezEvent; import org.apache.tez.runtime.api.impl.TezUmbilical; import org.apache.tez.runtime.common.resources.ScalingAllocator; +import org.apache.tez.runtime.task.TaskRunner2Callable; import org.junit.Test; import com.google.common.collect.HashMultimap; import com.google.common.collect.Lists; import com.google.common.collect.Multimap; + public class TestLogicalIOProcessorRuntimeTask { @Test(timeout = 5000) @@ -77,10 +94,14 @@ public void testAutoStart() throws Exception { ScalingAllocator.class.getName()); TezTaskAttemptID taId1 = createTaskAttemptID(vertexId, 1); - TaskSpec task1 = createTaskSpec(taId1, "dag1", "vertex1", 30); + TaskSpec task1 = createTaskSpec(taId1, "dag1", + "vertex1", 30, TestProcessor.class.getName(), + TestOutput.class.getName()); TezTaskAttemptID taId2 = createTaskAttemptID(vertexId, 2); - TaskSpec task2 = createTaskSpec(taId2, "dag2", "vertex1", 10); + TaskSpec task2 = createTaskSpec(taId2, "dag2", + "vertex1", 10, TestProcessor.class.getName(), + TestOutput.class.getName()); TezSharedExecutor sharedExecutor = new TezSharedExecutor(tezConf); LogicalIOProcessorRuntimeTask lio1 = new LogicalIOProcessorRuntimeTask(task1, 0, tezConf, null, @@ -142,6 +163,83 @@ public void testAutoStart() throws Exception { } + @Test + public void testEventsCantBeSentInCleanup() throws Exception { + TezDAGID dagId = createTezDagId(); + TezVertexID vertexId = createTezVertexId(dagId); + Map serviceConsumerMetadata = new HashMap<>(); + Multimap startedInputsMap = HashMultimap.create(); + TezUmbilical umbilical = mock(TezUmbilical.class); + TezConfiguration tezConf = new TezConfiguration(); + tezConf.set(TezConfiguration.TEZ_TASK_SCALE_MEMORY_ALLOCATOR_CLASS, + ScalingAllocator.class.getName()); + + TezTaskAttemptID taId1 = createTaskAttemptID(vertexId, 1); + TaskSpec task1 = createTaskSpec(taId1, "dag1", "vertex1", 30, + RunExceptionProcessor.class.getName(), + TestOutputWithEvents.class.getName()); + + TezSharedExecutor sharedExecutor = new TezSharedExecutor(tezConf); + LogicalIOProcessorRuntimeTask lio = + new CleanupLogicalIOProcessorRuntimeTask(task1, 0, tezConf, null, + umbilical, serviceConsumerMetadata, new HashMap(), + startedInputsMap, null, "", new ExecutionContextImpl("localhost"), + Runtime.getRuntime().maxMemory(), true, new DefaultHadoopShim(), + sharedExecutor); + + TaskRunner2Callable runner = + new TaskRunner2Callable(lio, UserGroupInformation.getCurrentUser(), umbilical); + + runner.call(); + + // We verify that no events were sent + verify(umbilical, only()).addEvents(Collections. emptyList()); + } + + /** + * We should expect no events being sent to the AM if an + * exception happens in the close method of the processor + */ + @Test + @SuppressWarnings("unchecked") + public void testExceptionHappensInClose() throws Exception { + TezDAGID dagId = createTezDagId(); + TezVertexID vertexId = createTezVertexId(dagId); + Map serviceConsumerMetadata = new HashMap<>(); + Multimap startedInputsMap = HashMultimap.create(); + TezUmbilical umbilical = mock(TezUmbilical.class); + TezConfiguration tezConf = new TezConfiguration(); + tezConf.set(TezConfiguration.TEZ_TASK_SCALE_MEMORY_ALLOCATOR_CLASS, + ScalingAllocator.class.getName()); + + TezTaskAttemptID taId1 = createTaskAttemptID(vertexId, 1); + TaskSpec task1 = createTaskSpec(taId1, "dag1", "vertex1", 30, + CloseExceptionProcessor.class.getName(), + TestOutputWithEvents.class.getName()); + + TezSharedExecutor sharedExecutor = new TezSharedExecutor(tezConf); + LogicalIOProcessorRuntimeTask lio1 = new LogicalIOProcessorRuntimeTask(task1, 0, tezConf, null, + umbilical, serviceConsumerMetadata, new HashMap(), startedInputsMap, null, + "", new ExecutionContextImpl("localhost"), Runtime.getRuntime().maxMemory(), true, + new DefaultHadoopShim(), sharedExecutor); + + try { + lio1.initialize(); + lio1.run(); + + try { + lio1.close(); + fail("RuntimeException should have been thrown"); + } catch (RuntimeException e) { + // No events should be sent thorught the umbilical protocol + verify(umbilical, never()).addEvents(anyList()); + } + } finally { + sharedExecutor.shutdownNow(); + cleanupAndTest(lio1); + } + } + private void cleanupAndTest(LogicalIOProcessorRuntimeTask lio) throws InterruptedException { ProcessorContext procContext = lio.getProcessorContext(); @@ -175,7 +273,7 @@ private void cleanupAndTest(LogicalIOProcessorRuntimeTask lio) throws Interrupte assertEquals(0, lio.outputSpecs.size()); assertTrue(lio.groupInputSpecs == null || lio.groupInputSpecs.size() == 0); } - + assertEquals(0, lio.inputsMap.size()); assertEquals(0, lio.inputContextMap.size()); assertEquals(0, lio.outputsMap.size()); @@ -190,11 +288,12 @@ private void cleanupAndTest(LogicalIOProcessorRuntimeTask lio) throws Interrupte } private TaskSpec createTaskSpec(TezTaskAttemptID taskAttemptID, - String dagName, String vertexName, int parallelism) { - ProcessorDescriptor processorDesc = createProcessorDescriptor(); + String dagName, String vertexName, int parallelism, + String processorClassname, String outputClassName) { + ProcessorDescriptor processorDesc = createProcessorDescriptor(processorClassname); TaskSpec taskSpec = new TaskSpec(taskAttemptID, dagName, vertexName, parallelism, processorDesc, - createInputSpecList(), createOutputSpecList(), null, null); + createInputSpecList(), createOutputSpecList(outputClassName), null, null); return taskSpec; } @@ -204,14 +303,14 @@ private List createInputSpecList() { return Lists.newArrayList(inputSpec); } - private List createOutputSpecList() { - OutputDescriptor outputtDesc = OutputDescriptor.create(TestOutput.class.getName()); + private List createOutputSpecList(String outputClassName) { + OutputDescriptor outputtDesc = OutputDescriptor.create(outputClassName); OutputSpec outputSpec = new OutputSpec("outedge", outputtDesc, 1); return Lists.newArrayList(outputSpec); } - private ProcessorDescriptor createProcessorDescriptor() { - ProcessorDescriptor desc = ProcessorDescriptor.create(TestProcessor.class.getName()); + private ProcessorDescriptor createProcessorDescriptor(String className) { + ProcessorDescriptor desc = ProcessorDescriptor.create(className); return desc; } @@ -229,6 +328,31 @@ private TezDAGID createTezDagId() { return TezDAGID.getInstance("2000", 100, 1); } + private static class CleanupLogicalIOProcessorRuntimeTask + extends LogicalIOProcessorRuntimeTask { + CleanupLogicalIOProcessorRuntimeTask(TaskSpec taskSpec, + int appAttemptNumber, Configuration tezConf, String[] localDirs, + TezUmbilical tezUmbilical, + Map serviceConsumerMetadata, + Map envMap, Multimap startedInputsMap, + ObjectRegistry objectRegistry, String pid, + org.apache.tez.runtime.api.ExecutionContext ExecutionContext, + long memAvailable, boolean updateSysCounters, HadoopShim hadoopShim, + TezExecutors sharedExecutor) throws IOException { + super(taskSpec, appAttemptNumber, tezConf, localDirs, tezUmbilical, + serviceConsumerMetadata, envMap, startedInputsMap, objectRegistry, + pid, ExecutionContext, memAvailable, updateSysCounters, hadoopShim, + sharedExecutor); + } + + @Override public void cleanup() throws InterruptedException { + getOutputContexts().forEach(context + -> context.sendEvents(Arrays.asList( + CompositeDataMovementEvent.create(0, 0, null) + ))); + } + } + public static class TestProcessor extends AbstractLogicalIOProcessor { public static volatile int runCount = 0; @@ -248,15 +372,47 @@ public void run(Map inputs, Map out getContext().notifyProgress(); } - @Override - public void handleEvents(List processorEvents) { - - } + @Override + public void handleEvents(List processorEvents) { + } - @Override - public void close() throws Exception { - - } + @Override + public void close() throws Exception { + } + + } + + public static class RunExceptionProcessor + extends TestProcessor { + + public RunExceptionProcessor(ProcessorContext context) { + super(context); + } + + public void run(Map inputs, + Map outputs) + throws Exception { + // This exception is thrown in purpose because we want to test this + throw new RuntimeException(); + } + + @Override + public void close() throws Exception { + // This exception is thrown because this method shouldn't be called + // if run has thrown an exception. + throw new RuntimeException(); + } + } + + public static class CloseExceptionProcessor extends TestProcessor { + public CloseExceptionProcessor(ProcessorContext context) { + super(context); + } + + @Override + public void close() throws Exception { + throw new RuntimeException(); + } } @@ -336,6 +492,22 @@ public void handleEvents(List outputEvents) { public List close() throws Exception { return null; } + } + + public static class TestOutputWithEvents extends TestOutput { + + public static volatile int startCount = 0; + public static volatile int vertexParallelism; + + public TestOutputWithEvents(OutputContext outputContext, int numPhysicalOutputs) { + super(outputContext, numPhysicalOutputs); + } + @Override + public List close() throws Exception { + return Arrays.asList( + CompositeDataMovementEvent.create(0, + 0, null)); + } } } diff --git a/tez-runtime-internals/src/test/java/org/apache/tez/runtime/metrics/TestFileSystemStatisticUpdater.java b/tez-runtime-internals/src/test/java/org/apache/tez/runtime/metrics/TestFileSystemStatisticUpdater.java new file mode 100644 index 0000000000..b07f811ded --- /dev/null +++ b/tez-runtime-internals/src/test/java/org/apache/tez/runtime/metrics/TestFileSystemStatisticUpdater.java @@ -0,0 +1,118 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

    + * http://www.apache.org/licenses/LICENSE-2.0 + *

    + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tez.runtime.metrics; + +import java.io.IOException; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hdfs.DFSTestUtil; +import org.apache.hadoop.hdfs.MiniDFSCluster; +import org.apache.tez.common.counters.FileSystemCounter; +import org.apache.tez.common.counters.TezCounter; +import org.apache.tez.common.counters.TezCounters; +import org.junit.AfterClass; +import org.junit.Assert; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class TestFileSystemStatisticUpdater { + + private static final Logger LOG = LoggerFactory.getLogger( + TestFileSystemStatisticUpdater.class); + + private static MiniDFSCluster dfsCluster; + + private static final Configuration CONF = new Configuration(); + private static FileSystem remoteFs; + + private static final String TEST_ROOT_DIR = "target" + Path.SEPARATOR + + TestFileSystemStatisticUpdater.class.getName() + "-tmpDir"; + + @BeforeClass + public static void beforeClass() throws Exception { + CONF.set(MiniDFSCluster.HDFS_MINIDFS_BASEDIR, TEST_ROOT_DIR); + } + + @AfterClass + public static void tearDown() { + if (dfsCluster != null) { + dfsCluster.shutdown(); + dfsCluster = null; + } + } + + @Before + public void setup() throws IOException { + FileSystem.clearStatistics(); + try { + // tear down the whole cluster before each test to completely get rid of file system statistics + if (dfsCluster != null) { + dfsCluster.shutdown(); + } + dfsCluster = new MiniDFSCluster.Builder(CONF).numDataNodes(2).build(); + remoteFs = dfsCluster.getFileSystem(); + } catch (IOException io) { + throw new RuntimeException("problem starting mini dfs cluster", io); + } + } + + @Test + public void basicTest() throws IOException { + TezCounters counters = new TezCounters(); + TaskCounterUpdater updater = new TaskCounterUpdater(counters, CONF, "pid"); + + DFSTestUtil.writeFile(remoteFs, new Path("/tmp/foo/abc.txt"), "xyz"); + + updater.updateCounters(); + LOG.info("Counters (after first update): {}", counters); + assertCounter(counters, FileSystemCounter.OP_MKDIRS, 0); // DFSTestUtil doesn't call separate mkdirs + assertCounter(counters, FileSystemCounter.OP_CREATE, 1); + assertCounter(counters, FileSystemCounter.BYTES_WRITTEN, 3); // "xyz" + assertCounter(counters, FileSystemCounter.WRITE_OPS, 1); + assertCounter(counters, FileSystemCounter.OP_GET_FILE_STATUS, 1); // DFSTestUtil calls fs.exists + assertCounter(counters, FileSystemCounter.OP_CREATE, 1); + + DFSTestUtil.writeFile(remoteFs, new Path("/tmp/foo/abc1.txt"), "xyz"); + + updater.updateCounters(); + LOG.info("Counters (after second update): {}", counters); + assertCounter(counters, FileSystemCounter.OP_CREATE, 2); + assertCounter(counters, FileSystemCounter.BYTES_WRITTEN, 6); // "xyz" has been written twice + assertCounter(counters, FileSystemCounter.WRITE_OPS, 2); + assertCounter(counters, FileSystemCounter.OP_GET_FILE_STATUS, 2); // DFSTestUtil calls fs.exists again + assertCounter(counters, FileSystemCounter.OP_CREATE, 2); + + // Ensure all numbers are reset + updater.updateCounters(); + LOG.info("Counters (after third update): {}", counters); + // counter holds its value after clearStatistics + updateCounters + assertCounter(counters, FileSystemCounter.OP_CREATE, 2); + } + + private void assertCounter(TezCounters counters, FileSystemCounter fsCounter, int value) { + TezCounter counter = counters.findCounter(remoteFs.getScheme(), fsCounter); + Assert.assertNotNull(counter); + Assert.assertEquals(value, counter.getValue()); + } +} diff --git a/tez-runtime-internals/src/test/java/org/apache/tez/runtime/metrics/TestTaskCounterUpdater.java b/tez-runtime-internals/src/test/java/org/apache/tez/runtime/metrics/TestTaskCounterUpdater.java new file mode 100644 index 0000000000..aa782396cb --- /dev/null +++ b/tez-runtime-internals/src/test/java/org/apache/tez/runtime/metrics/TestTaskCounterUpdater.java @@ -0,0 +1,59 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

    + * http://www.apache.org/licenses/LICENSE-2.0 + *

    + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tez.runtime.metrics; + +import org.apache.hadoop.conf.Configuration; +import org.apache.tez.common.counters.TaskCounter; +import org.apache.tez.common.counters.TezCounter; +import org.apache.tez.common.counters.TezCounters; +import org.junit.Assert; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class TestTaskCounterUpdater { + + private static final Logger LOG = LoggerFactory.getLogger(TestTaskCounterUpdater.class); + private static final Configuration CONF = new Configuration(); + + @Test + public void basicTest() { + TezCounters counters = new TezCounters(); + TaskCounterUpdater updater = new TaskCounterUpdater(counters, CONF, "pid"); + + updater.updateCounters(); + LOG.info("Counters (after first update): {}", counters); + assertCounter(counters, TaskCounter.GC_TIME_MILLIS); + TezCounter cpuCounter = assertCounter(counters, TaskCounter.CPU_MILLISECONDS); + + long oldVal = cpuCounter.getValue(); + Assert.assertTrue(cpuCounter.getValue() > 0); + + updater.updateCounters(); + LOG.info("Counters (after second update): {}", counters); + Assert.assertTrue("Counter not updated, old=" + oldVal + + ", new=" + cpuCounter.getValue(), cpuCounter.getValue() > oldVal); + } + + private TezCounter assertCounter(TezCounters counters, TaskCounter taskCounter) { + TezCounter counter = counters.findCounter(taskCounter); + Assert.assertNotNull(counter); + return counter; + } +} diff --git a/tez-runtime-internals/src/test/java/org/apache/tez/runtime/task/TaskExecutionTestHelpers.java b/tez-runtime-internals/src/test/java/org/apache/tez/runtime/task/TaskExecutionTestHelpers.java index 626d178cec..3e6790c6cf 100644 --- a/tez-runtime-internals/src/test/java/org/apache/tez/runtime/task/TaskExecutionTestHelpers.java +++ b/tez-runtime-internals/src/test/java/org/apache/tez/runtime/task/TaskExecutionTestHelpers.java @@ -21,6 +21,7 @@ import java.util.LinkedList; import java.util.List; import java.util.Map; +import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicLong; import java.util.concurrent.locks.Condition; import java.util.concurrent.locks.ReentrantLock; @@ -46,13 +47,17 @@ import org.apache.tez.runtime.api.impl.TezEvent; import org.apache.tez.runtime.api.impl.TezHeartbeatRequest; import org.apache.tez.runtime.api.impl.TezHeartbeatResponse; + +import org.junit.Assert; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -public class TaskExecutionTestHelpers { +public final class TaskExecutionTestHelpers { public static final String HEARTBEAT_EXCEPTION_STRING = "HeartbeatException"; + private TaskExecutionTestHelpers() {} + // Uses static fields for signaling. Ensure only used by one test at a time. public static class TestProcessor extends AbstractLogicalIOProcessor { @@ -184,9 +189,7 @@ public static void awaitCompletion() throws InterruptedException { LOG.info("Await completion"); processorLock.lock(); try { - if (completed) { - return; - } else { + if (!completed) { completionCondition.await(); } } finally { @@ -295,14 +298,14 @@ public static class TezTaskUmbilicalForTest implements TezTaskUmbilicalProtocol private static final Logger LOG = LoggerFactory.getLogger(TezTaskUmbilicalForTest.class); - private final List requestEvents = new LinkedList(); + private final List requestEvents = new LinkedList<>(); private final ReentrantLock umbilicalLock = new ReentrantLock(); private final Condition eventCondition = umbilicalLock.newCondition(); private boolean pendingEvent = false; private boolean eventEnacted = false; - volatile int getTaskInvocations = 0; + private final AtomicInteger taskInvocations = new AtomicInteger(0); private boolean shouldThrowException = false; private boolean shouldSendDieSignal = false; @@ -395,19 +398,20 @@ public void verifyTaskFailedEvent(String diagStart, String diagContains, TaskFai for (TezEvent event : requestEvents) { if (event.getEvent() instanceof TaskAttemptFailedEvent) { TaskAttemptFailedEvent failedEvent = (TaskAttemptFailedEvent) event.getEvent(); - if (failedEvent.getDiagnostics().startsWith(diagStart)) { + String diagnostics = getDiagnosticsWithoutNodeIp(failedEvent.getDiagnostics()); + if (diagnostics.startsWith(diagStart)) { if (diagContains != null) { - if (failedEvent.getDiagnostics().contains(diagContains)) { + if (diagnostics.contains(diagContains)) { assertEquals(taskFailureType, failedEvent.getTaskFailureType()); return; } else { fail("Diagnostic message does not contain expected message. Found [" + - failedEvent.getDiagnostics() + "], Expected: [" + diagContains + "]"); + diagnostics + "], Expected: [" + diagContains + "]"); } } } else { fail("Diagnostic message does not start with expected message. Found [" + - failedEvent.getDiagnostics() + "], Expected: [" + diagStart + "]"); + diagnostics + "], Expected: [" + diagStart + "]"); } } } @@ -424,18 +428,19 @@ public void verifyTaskKilledEvent(String diagStart, String diagContains) { if (event.getEvent() instanceof TaskAttemptKilledEvent) { TaskAttemptKilledEvent killedEvent = (TaskAttemptKilledEvent) event.getEvent(); - if (killedEvent.getDiagnostics().startsWith(diagStart)) { + String diagnostics = getDiagnosticsWithoutNodeIp(killedEvent.getDiagnostics()); + if (diagnostics.startsWith(diagStart)) { if (diagContains != null) { - if (killedEvent.getDiagnostics().contains(diagContains)) { + if (diagnostics.contains(diagContains)) { return; } else { fail("Diagnostic message does not contain expected message. Found [" + - killedEvent.getDiagnostics() + "], Expected: [" + diagContains + "]"); + diagnostics + "], Expected: [" + diagContains + "]"); } } } else { fail("Diagnostic message does not start with expected message. Found [" + - killedEvent.getDiagnostics() + "], Expected: [" + diagStart + "]"); + diagnostics + "], Expected: [" + diagStart + "]"); } } } @@ -461,20 +466,20 @@ public void verifyTaskSuccessEvent() { } @Override - public long getProtocolVersion(String protocol, long clientVersion) throws IOException { + public long getProtocolVersion(String protocol, long clientVersion) { return 0; } @Override public ProtocolSignature getProtocolSignature(String protocol, long clientVersion, - int clientMethodsHash) throws IOException { + int clientMethodsHash) { return null; } @Override public ContainerTask getTask(ContainerContext containerContext) throws IOException { // Return shouldDie = true - getTaskInvocations++; + taskInvocations.incrementAndGet(); return new ContainerTask(null, true, null, null, false); } @@ -511,18 +516,31 @@ public TezHeartbeatResponse heartbeat(TezHeartbeatRequest request) throws IOExce umbilicalLock.unlock(); } } + + public int getTaskInvocations() { + return taskInvocations.get(); + } + } + + private static String getDiagnosticsWithoutNodeIp(String diagnostics) { + String diagnosticsWithoutIP = diagnostics; + if (diagnostics != null && diagnostics.startsWith("Node:")) { + diagnosticsWithoutIP = diagnostics.substring(diagnostics.indexOf(" : ") + 3); + String nodeIp = diagnostics.substring(5, diagnostics.indexOf(" : ")); + Assert.assertFalse(nodeIp.isEmpty()); + } + + return diagnosticsWithoutIP; } @SuppressWarnings("deprecation") public static ContainerId createContainerId(ApplicationId appId) { ApplicationAttemptId appAttemptId = ApplicationAttemptId.newInstance(appId, 1); - ContainerId containerId = ContainerId.newInstance(appAttemptId, 1); - return containerId; + return ContainerId.newInstance(appAttemptId, 1); } public static TaskReporter createTaskReporter(ApplicationId appId, TezTaskUmbilicalForTest umbilical) { - TaskReporter taskReporter = new TaskReporter(umbilical, 100, 1000, 100, new AtomicLong(0), + return new TaskReporter(umbilical, 100, 1000, 100, new AtomicLong(0), createContainerId(appId).toString()); - return taskReporter; } } diff --git a/tez-runtime-internals/src/test/java/org/apache/tez/runtime/task/TestContainerExecution.java b/tez-runtime-internals/src/test/java/org/apache/tez/runtime/task/TestContainerExecution.java index c3c4705c15..a570ab8243 100644 --- a/tez-runtime-internals/src/test/java/org/apache/tez/runtime/task/TestContainerExecution.java +++ b/tez-runtime-internals/src/test/java/org/apache/tez/runtime/task/TestContainerExecution.java @@ -51,7 +51,7 @@ public void testGetTaskShouldDie() throws InterruptedException, ExecutionExcepti ListenableFuture getTaskFuture = executor.submit(containerReporter); getTaskFuture.get(); - assertEquals(1, umbilical.getTaskInvocations); + assertEquals(1, umbilical.getTaskInvocations()); } finally { executor.shutdownNow(); diff --git a/tez-runtime-internals/src/test/java/org/apache/tez/runtime/task/TestTaskExecution2.java b/tez-runtime-internals/src/test/java/org/apache/tez/runtime/task/TestTaskExecution2.java index 07b9d33b46..9c26a321ca 100644 --- a/tez-runtime-internals/src/test/java/org/apache/tez/runtime/task/TestTaskExecution2.java +++ b/tez-runtime-internals/src/test/java/org/apache/tez/runtime/task/TestTaskExecution2.java @@ -38,7 +38,9 @@ import java.util.concurrent.locks.ReentrantLock; import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; + +import org.apache.hadoop.fs.ClusterStorageCapacityExceededException; +import org.apache.hadoop.hdfs.protocol.NSQuotaExceededException; import com.google.common.collect.HashMultimap; import com.google.common.collect.Multimap; import com.google.common.util.concurrent.ListeningExecutorService; @@ -77,6 +79,7 @@ import org.apache.tez.runtime.common.resources.ScalingAllocator; import org.apache.tez.runtime.internals.api.TaskReporterInterface; import org.apache.tez.runtime.task.TaskExecutionTestHelpers.TestProcessor; +import org.apache.tez.runtime.task.TaskRunner2Callable.TaskRunner2CallableResult; import org.junit.AfterClass; import org.junit.Before; import org.junit.Test; @@ -177,7 +180,9 @@ public void testMultipleSuccessfulTasks() throws IOException, InterruptedExcepti assertFalse(TestProcessor.wasAborted()); umbilical.resetTrackedEvents(); TezCounters tezCounters = runtimeTask.getCounters(); - verifySysCounters(tezCounters, 5, 5); + // with TEZ-3331, fs counters are not set if the value is 0 (see FileSystemStatisticUpdater2), so there can be + // a mismatch in task counter count and fs counter count + verifySysCounters(tezCounters, 5, 0); taskRunner = createTaskRunner(appId, umbilical, taskReporter, executor, TestProcessor.CONF_EMPTY, false); @@ -653,11 +658,41 @@ public void testKilledAfterComplete() throws IOException, InterruptedException, } } - private void verifySysCounters(TezCounters tezCounters, int minTaskCounterCount, int minFsCounterCount) { + @Test + public void testClusterStorageCapacityFatalError() throws IOException { + // Try having a ClusterStorageCapacityExceededException, which is nested within several exceptions. + TezTaskRunner2ForTest taskRunner = createTaskRunnerForTest(); + TaskRunner2CallableResult executionResult = new TaskRunner2CallableResult(new Exception( + new IllegalArgumentException(new ClusterStorageCapacityExceededException("cluster capacity blown")))); + taskRunner.processCallableResult(executionResult); + + assertEquals(TaskFailureType.FATAL, taskRunner.getFirstTaskFailureType()); + + // Try having a child class of ClusterStorageCapacityExceededException, which is nested within several exceptions. + taskRunner = createTaskRunnerForTest(); + executionResult = new TaskRunner2CallableResult( + new Exception(new IllegalArgumentException(new NSQuotaExceededException("Namespace quota blown")))); + taskRunner.processCallableResult(executionResult); + + assertEquals(TaskFailureType.FATAL, taskRunner.getFirstTaskFailureType()); + + // Try having a ClusterStorageCapacityExceededException as the first exception (non-nested) + taskRunner = createTaskRunnerForTest(); + executionResult = + new TaskRunner2CallableResult(new ClusterStorageCapacityExceededException("cluster capacity blown")); + taskRunner.processCallableResult(executionResult); + + assertEquals(TaskFailureType.FATAL, taskRunner.getFirstTaskFailureType()); - Preconditions.checkArgument((minTaskCounterCount > 0 && minFsCounterCount > 0) || - (minTaskCounterCount <= 0 && minFsCounterCount <= 0), - "Both targetCounter counts should be postitive or negative. A mix is not expected"); + // Try having some other exception, for that it should be NON_FATAL + taskRunner = createTaskRunnerForTest(); + executionResult = new TaskRunner2CallableResult(new Exception(new IllegalArgumentException("Generic Exception"))); + taskRunner.processCallableResult(executionResult); + + assertEquals(TaskFailureType.NON_FATAL, taskRunner.getFirstTaskFailureType()); + } + + private void verifySysCounters(TezCounters tezCounters, int minTaskCounterCount, int minFsCounterCount) { int numTaskCounters = 0; int numFsCounters = 0; @@ -675,8 +710,8 @@ private void verifySysCounters(TezCounters tezCounters, int minTaskCounterCount, // If Target <=0, assert counter count is exactly 0 if (minTaskCounterCount <= 0) { - assertEquals(0, numTaskCounters); - assertEquals(0, numFsCounters); + assertEquals(tezCounters.toString(), 0, numTaskCounters); + assertEquals(tezCounters.toString(), 0, numFsCounters); } else { assertTrue(numTaskCounters >= minTaskCounterCount); assertTrue(numFsCounters >= minFsCounterCount); @@ -747,6 +782,11 @@ private TezTaskRunner2 createTaskRunner(ApplicationId appId, processorConf, false, updateSysCounters); } + private TezTaskRunner2ForTest createTaskRunnerForTest() throws IOException { + return (TezTaskRunner2ForTest) createTaskRunner(ApplicationId.newInstance(10000, 1), null, null, null, + TestProcessor.class.getName(), TestProcessor.CONF_EMPTY, true, false); + } + private TezTaskRunner2ForTest createTaskRunnerForTest(ApplicationId appId, TaskExecutionTestHelpers.TezTaskUmbilicalForTest umbilical, TaskReporter taskReporter, @@ -827,6 +867,9 @@ executionContext, memAvailable, updateSysCounters, new DefaultHadoopShim(), sharedExecutor); } + public TaskFailureType getFirstTaskFailureType() { + return firstTaskFailureType; + } @Override @VisibleForTesting diff --git a/tez-runtime-internals/src/test/java/org/apache/tez/runtime/task/TestTaskReporter.java b/tez-runtime-internals/src/test/java/org/apache/tez/runtime/task/TestTaskReporter.java index 04c467a8fc..7ecd74fb72 100644 --- a/tez-runtime-internals/src/test/java/org/apache/tez/runtime/task/TestTaskReporter.java +++ b/tez-runtime-internals/src/test/java/org/apache/tez/runtime/task/TestTaskReporter.java @@ -18,7 +18,8 @@ package org.apache.tez.runtime.task; -import static org.mockito.Matchers.any; +import static org.apache.tez.dag.api.TezConfiguration.TEZ_TASK_LOCAL_FS_WRITE_LIMIT_BYTES; +import static org.mockito.Mockito.any; import static org.mockito.Mockito.doAnswer; import static org.mockito.Mockito.doReturn; import static org.mockito.Mockito.isA; @@ -27,7 +28,11 @@ import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; +import java.io.File; +import java.io.IOException; +import java.util.Collections; import java.util.List; +import java.util.Random; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.Future; @@ -36,12 +41,21 @@ import com.google.common.collect.Lists; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.LocalFileSystem; +import org.apache.hadoop.fs.Path; import org.apache.tez.common.TezTaskUmbilicalProtocol; import org.apache.tez.common.counters.TezCounters; +import org.apache.tez.dag.api.TezConfiguration; import org.apache.tez.dag.api.TezUncheckedException; import org.apache.tez.dag.records.TezTaskAttemptID; import org.apache.tez.runtime.LogicalIOProcessorRuntimeTask; +import org.apache.tez.runtime.RuntimeTask.LocalWriteLimitException; import org.apache.tez.runtime.api.events.TaskStatusUpdateEvent; +import org.apache.tez.runtime.api.impl.InputSpec; +import org.apache.tez.runtime.api.impl.OutputSpec; +import org.apache.tez.runtime.api.impl.TaskSpec; import org.apache.tez.runtime.api.impl.TaskStatistics; import org.apache.tez.runtime.api.impl.TezEvent; import org.apache.tez.runtime.api.impl.TezHeartbeatRequest; @@ -55,11 +69,15 @@ @SuppressWarnings("rawtypes") public class TestTaskReporter { + private static final File TEST_DIR = + new File(System.getProperty("test.build.data"), TestTaskReporter.class.getName()).getAbsoluteFile(); + @Test(timeout = 10000) public void testContinuousHeartbeatsOnMaxEvents() throws Exception { final Object lock = new Object(); final AtomicBoolean hb2Done = new AtomicBoolean(false); + final int maxEvents = 5; TezTaskUmbilicalProtocol mockUmbilical = mock(TezTaskUmbilicalProtocol.class); doAnswer(new Answer() { @@ -68,7 +86,7 @@ public Object answer(InvocationOnMock invocation) throws Throwable { Object[] args = invocation.getArguments(); TezHeartbeatRequest request = (TezHeartbeatRequest) args[0]; if (request.getRequestId() == 1 || request.getRequestId() == 2) { - TezHeartbeatResponse response = new TezHeartbeatResponse(createEvents(5)); + TezHeartbeatResponse response = new TezHeartbeatResponse(createEvents(maxEvents)); response.setLastRequestId(request.getRequestId()); return response; } else if (request.getRequestId() == 3) { @@ -83,7 +101,7 @@ public Object answer(InvocationOnMock invocation) throws Throwable { throw new TezUncheckedException("Invalid request id for test: " + request.getRequestId()); } } - }).when(mockUmbilical).heartbeat(any(TezHeartbeatRequest.class)); + }).when(mockUmbilical).heartbeat(any()); TezTaskAttemptID mockTaskAttemptId = mock(TezTaskAttemptID.class); LogicalIOProcessorRuntimeTask mockTask = mock(LogicalIOProcessorRuntimeTask.class); @@ -92,7 +110,7 @@ public Object answer(InvocationOnMock invocation) throws Throwable { // Setup the sleep time to be way higher than the test timeout TaskReporter.HeartbeatCallable heartbeatCallable = - new TaskReporter.HeartbeatCallable(mockTask, mockUmbilical, 100000, 100000, 5, + new TaskReporter.HeartbeatCallable(mockTask, mockUmbilical, 100000, 100000, maxEvents, new AtomicLong(0), "containerIdStr"); @@ -104,10 +122,10 @@ public Object answer(InvocationOnMock invocation) throws Throwable { lock.wait(); } } - verify(mockUmbilical, times(3)).heartbeat(any(TezHeartbeatRequest.class)); - Thread.sleep(2000l); - // Sleep for 2 seconds, less than the callable sleep time. No more invocations. - verify(mockUmbilical, times(3)).heartbeat(any(TezHeartbeatRequest.class)); + verify(mockUmbilical, times(3)).heartbeat(any()); + Thread.sleep(200l); + // Sleep for less than the callable sleep time. No more invocations. + verify(mockUmbilical, times(3)).heartbeat(any()); } finally { executor.shutdownNow(); } @@ -217,6 +235,38 @@ public void testStatusUpdateAfterInitializationAndCounterFlag() { } + @Test + public void testLocalFileSystemBytesWrittenLimit() throws IOException { + TaskSpec mockSpec = mock(TaskSpec.class); + when(mockSpec.getInputs()).thenReturn(Collections.singletonList(mock(InputSpec.class))); + when(mockSpec.getOutputs()).thenReturn(Collections.singletonList(mock(OutputSpec.class))); + TezConfiguration tezConf = new TezConfiguration(); + LogicalIOProcessorRuntimeTask lio1 = + new LogicalIOProcessorRuntimeTask(mockSpec, 0, tezConf, null, null, null, null, null, null, "", null, + Runtime.getRuntime().maxMemory(), true, null, null); + + LocalFileSystem localFS = FileSystem.getLocal(tezConf); + FileSystem.clearStatistics(); + Path tmpPath = + new Path(TEST_DIR + "/testLocalFileSystemBytesWrittenLimit" + new Random(System.currentTimeMillis()).nextInt()); + try (FSDataOutputStream out = localFS.create(tmpPath, true)) { + out.write(new byte[1024]); + } + // Check limits with default shouldn't throw exception. + lio1.checkTaskLimits(); + + tezConf.setLong(TEZ_TASK_LOCAL_FS_WRITE_LIMIT_BYTES, 10); + lio1 = new LogicalIOProcessorRuntimeTask(mockSpec, 0, tezConf, null, null, null, null, null, null, "", null, + Runtime.getRuntime().maxMemory(), true, null, null); + + try { + lio1.checkTaskLimits(); + Assert.fail("Expected to throw LocalWriteLimitException"); + } catch (LocalWriteLimitException localWriteLimitException) { + Assert.assertTrue(localWriteLimitException.getMessage().contains("Too much write to local file system")); + } + } + private List createEvents(int numEvents) { List list = Lists.newArrayListWithCapacity(numEvents); for (int i = 0; i < numEvents; i++) { diff --git a/tez-runtime-internals/src/test/java/org/apache/tez/runtime/task/TestTezTaskRunner2.java b/tez-runtime-internals/src/test/java/org/apache/tez/runtime/task/TestTezTaskRunner2.java index 6876df93ec..a6d05beb5f 100644 --- a/tez-runtime-internals/src/test/java/org/apache/tez/runtime/task/TestTezTaskRunner2.java +++ b/tez-runtime-internals/src/test/java/org/apache/tez/runtime/task/TestTezTaskRunner2.java @@ -25,9 +25,14 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.security.UserGroupInformation; +import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.tez.common.TezExecutors; import org.apache.tez.common.TezSharedExecutor; import org.apache.tez.dag.api.ProcessorDescriptor; +import org.apache.tez.dag.records.TezDAGID; +import org.apache.tez.dag.records.TezTaskAttemptID; +import org.apache.tez.dag.records.TezTaskID; +import org.apache.tez.dag.records.TezVertexID; import org.apache.tez.hadoop.shim.DefaultHadoopShim; import org.apache.tez.runtime.api.impl.InputSpec; import org.apache.tez.runtime.api.impl.OutputSpec; @@ -50,8 +55,13 @@ public void testTaskConfUsage() throws Exception { List inputSpecList = new ArrayList<>(); List outputSpecList = new ArrayList<>(); - TaskSpec taskSpec = new TaskSpec("dagName", "vertexName", 1, mock(ProcessorDescriptor.class), - inputSpecList, outputSpecList, null, taskConf); + TaskSpec taskSpec = + new TaskSpec( + TezTaskAttemptID.getInstance( + TezTaskID.getInstance(TezVertexID + .getInstance(TezDAGID.getInstance(ApplicationId.fromString("application_1_1"), 0), 0), 0), 0), + "dagName", "vertexName", 1, mock(ProcessorDescriptor.class), inputSpecList, + outputSpecList, null, taskConf); TezExecutors sharedExecutor = new TezSharedExecutor(conf); TezTaskRunner2 taskRunner2 = new TezTaskRunner2(conf, mock(UserGroupInformation.class), localDirs, taskSpec, 1, null, null, null, mock(TaskReporter.class), null, null, "pid", diff --git a/tez-runtime-internals/src/test/resources/META-INF/LICENSE.txt b/tez-runtime-internals/src/test/resources/META-INF/LICENSE similarity index 100% rename from tez-runtime-internals/src/test/resources/META-INF/LICENSE.txt rename to tez-runtime-internals/src/test/resources/META-INF/LICENSE diff --git a/tez-runtime-internals/src/test/resources/META-INF/NOTICE b/tez-runtime-internals/src/test/resources/META-INF/NOTICE new file mode 100644 index 0000000000..2595905699 --- /dev/null +++ b/tez-runtime-internals/src/test/resources/META-INF/NOTICE @@ -0,0 +1,6 @@ +Apache Tez +Copyright 2014-2024 The Apache Software Foundation + +This product includes software developed at +The Apache Software Foundation (http://www.apache.org/). + diff --git a/tez-runtime-internals/src/test/resources/META-INF/NOTICE.txt b/tez-runtime-internals/src/test/resources/META-INF/NOTICE.txt deleted file mode 100644 index 3f36fcc6ba..0000000000 --- a/tez-runtime-internals/src/test/resources/META-INF/NOTICE.txt +++ /dev/null @@ -1,6 +0,0 @@ -Apache Tez -Copyright (c) 2016 The Apache Software Foundation - -This product includes software developed at -The Apache Software Foundation (http://www.apache.org/). - diff --git a/tez-runtime-library/findbugs-exclude.xml b/tez-runtime-library/findbugs-exclude.xml index 24fe93e2e2..caa46c5527 100644 --- a/tez-runtime-library/findbugs-exclude.xml +++ b/tez-runtime-library/findbugs-exclude.xml @@ -207,4 +207,17 @@ + + + + + + + + + + + + + diff --git a/tez-runtime-library/pom.xml b/tez-runtime-library/pom.xml index b3a19a3668..587f8fb9fe 100644 --- a/tez-runtime-library/pom.xml +++ b/tez-runtime-library/pom.xml @@ -20,19 +20,27 @@ org.apache.tez tez - 0.9.1-SNAPSHOT + 0.10.5-SNAPSHOT tez-runtime-library + + false + + org.roaringbitmap RoaringBitmap - com.ning + org.asynchttpclient async-http-client + + io.netty + netty-all + org.apache.tez tez-api @@ -98,7 +106,7 @@ org.mockito - mockito-all + mockito-core test @@ -110,30 +118,26 @@ apache-rat-plugin - org.apache.hadoop - hadoop-maven-plugins + com.github.os72 + protoc-jar-maven-plugin - compile-protoc generate-sources - protoc + run - ${protobuf.version} + com.google.protobuf:protoc:${protobuf.version} ${protoc.path} - - ${basedir}/src/main/proto - - - ${basedir}/src/main/proto - - ShufflePayloads.proto - CartesianProductPayload.proto - FairShufflePayloads.proto - - - ${project.build.directory}/generated-sources/java + none + + ${basedir}/src/main/proto + + + + ${project.build.directory}/generated-sources/java + + diff --git a/tez-runtime-library/src/main/java/org/apache/hadoop/io/FileChunk.java b/tez-runtime-library/src/main/java/org/apache/hadoop/io/FileChunk.java index e7a5c244dc..714bbcd484 100644 --- a/tez-runtime-library/src/main/java/org/apache/hadoop/io/FileChunk.java +++ b/tez-runtime-library/src/main/java/org/apache/hadoop/io/FileChunk.java @@ -18,7 +18,8 @@ package org.apache.hadoop.io; -import com.google.common.base.Preconditions; +import java.util.Objects; + import org.apache.hadoop.classification.InterfaceAudience.Private; import org.apache.hadoop.fs.Path; import org.apache.tez.runtime.library.common.InputAttemptIdentifier; @@ -40,7 +41,7 @@ public FileChunk(Path path, long offset, long length, boolean isLocalFile, this.isLocalFile = isLocalFile; this.identifier = identifier; if (isLocalFile) { - Preconditions.checkNotNull(identifier); + Objects.requireNonNull(identifier); } } diff --git a/tez-runtime-library/src/main/java/org/apache/tez/common/TezRuntimeFrameworkConfigs.java b/tez-runtime-library/src/main/java/org/apache/tez/common/TezRuntimeFrameworkConfigs.java index 62bc232b88..61c0fcd317 100644 --- a/tez-runtime-library/src/main/java/org/apache/tez/common/TezRuntimeFrameworkConfigs.java +++ b/tez-runtime-library/src/main/java/org/apache/tez/common/TezRuntimeFrameworkConfigs.java @@ -27,7 +27,7 @@ * Configuration parameters which are internal to the Inputs and Outputs which exist in the Runtime */ @Private -public class TezRuntimeFrameworkConfigs { +public final class TezRuntimeFrameworkConfigs { private static final String TEZ_RUNTIME_FRAMEWORK_PREFIX = "tez.runtime.framework."; @@ -41,4 +41,6 @@ public class TezRuntimeFrameworkConfigs { public static final String TEZ_RUNTIME_METRICS_SESSION_ID = TEZ_RUNTIME_FRAMEWORK_PREFIX + "metrics.session.id"; public static final String TEZ_RUNTIME_METRICS_SESSION_ID_DEFAULT = ""; + + private TezRuntimeFrameworkConfigs() {} } diff --git a/tez-runtime-library/src/main/java/org/apache/tez/dag/library/edgemanager/SilentEdgeManager.java b/tez-runtime-library/src/main/java/org/apache/tez/dag/library/edgemanager/SilentEdgeManager.java new file mode 100644 index 0000000000..db6bb5affe --- /dev/null +++ b/tez-runtime-library/src/main/java/org/apache/tez/dag/library/edgemanager/SilentEdgeManager.java @@ -0,0 +1,89 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tez.dag.library.edgemanager; + +import org.apache.tez.dag.api.EdgeManagerPlugin; +import org.apache.tez.dag.api.EdgeManagerPluginContext; +import org.apache.tez.runtime.api.events.DataMovementEvent; +import org.apache.tez.runtime.api.events.InputReadErrorEvent; + +import java.util.List; +import java.util.Map; + +/** + * A dummy edge manager used in scenarios where application will depend on + * the direct connection between containers/tasks to handle all data communications, + * including both routing and actual data transfers. + */ + +public class SilentEdgeManager extends EdgeManagerPlugin { + + /** + * Create an instance of the EdgeManagerPlugin. Classes extending this to + * create a EdgeManagerPlugin, must provide the same constructor so that Tez + * can create an instance of the class at runtime. + * + * @param context the context within which this EdgeManagerPlugin will run. Includes + * information like configuration which the user may have specified + * while setting up the edge. + */ + public SilentEdgeManager(EdgeManagerPluginContext context) { + super(context); + } + + @Override + public void initialize() throws Exception { + + } + + @Override + public int getNumDestinationTaskPhysicalInputs(int destinationTaskIndex) throws Exception { + return 0; + } + + @Override + public int getNumSourceTaskPhysicalOutputs(int sourceTaskIndex) throws Exception { + return 0; + } + + @Override + public void routeDataMovementEventToDestination( + DataMovementEvent event, int sourceTaskIndex, int sourceOutputIndex, + Map> destinationTaskAndInputIndices) throws Exception { + throw new UnsupportedOperationException( + "routeDataMovementEventToDestination not supported for SilentEdgeManager"); + } + + @Override + public void routeInputSourceTaskFailedEventToDestination( + int sourceTaskIndex, Map> destinationTaskAndInputIndices) throws Exception { + throw new UnsupportedOperationException( + "routeInputSourceTaskFailedEventToDestination not supported for SilentEdgeManager"); + } + + @Override + public int getNumDestinationConsumerTasks(int sourceTaskIndex) throws Exception { + return 0; + } + + @Override + public int routeInputErrorEventToSource(InputReadErrorEvent event, int destinationTaskIndex, int destinationFailedInputIndex) throws Exception { + return 0; + } +} diff --git a/tez-runtime-library/src/main/java/org/apache/tez/dag/library/vertexmanager/FairShuffleVertexManager.java b/tez-runtime-library/src/main/java/org/apache/tez/dag/library/vertexmanager/FairShuffleVertexManager.java index a8b336c56a..af4e5b8b26 100644 --- a/tez-runtime-library/src/main/java/org/apache/tez/dag/library/vertexmanager/FairShuffleVertexManager.java +++ b/tez-runtime-library/src/main/java/org/apache/tez/dag/library/vertexmanager/FairShuffleVertexManager.java @@ -18,7 +18,7 @@ package org.apache.tez.dag.library.vertexmanager; -import com.google.common.base.Preconditions; +import org.apache.tez.common.Preconditions; import com.google.common.collect.Lists; import com.google.common.collect.UnmodifiableIterator; @@ -234,7 +234,9 @@ public long[] estimatePartitionSize() { } else { for (int i = 0; i < numOfPartitions; i++) { estimatedPartitionOutputSize[i] = - MB * getExpectedStatsAtIndex(i); + getExpectedStatsAtIndex(i); + LOG.info("Partition index {} with size {}", i, + estimatedPartitionOutputSize[i]); } } return estimatedPartitionOutputSize; @@ -419,9 +421,12 @@ public void compute() { } Iterator it = iterator(); while(it.hasNext()) { + DestinationTaskInputsProperty property = it.next(); sourceVertexInfo.getDestinationInputsProperties().put( - destinationIndex,it.next()); + destinationIndex, property); destinationIndex++; + LOG.info("Destination Index {}: Input Property {}", + destinationIndex, property); } startNextPartitionsGroup(); } diff --git a/tez-runtime-library/src/main/java/org/apache/tez/dag/library/vertexmanager/InputReadyVertexManager.java b/tez-runtime-library/src/main/java/org/apache/tez/dag/library/vertexmanager/InputReadyVertexManager.java index f05cd955e4..33c4a99059 100644 --- a/tez-runtime-library/src/main/java/org/apache/tez/dag/library/vertexmanager/InputReadyVertexManager.java +++ b/tez-runtime-library/src/main/java/org/apache/tez/dag/library/vertexmanager/InputReadyVertexManager.java @@ -40,7 +40,7 @@ import org.apache.tez.runtime.api.TaskAttemptIdentifier; import org.apache.tez.runtime.api.events.VertexManagerEvent; -import com.google.common.base.Preconditions; +import org.apache.tez.common.Preconditions; import com.google.common.collect.Lists; import com.google.common.collect.Maps; diff --git a/tez-runtime-library/src/main/java/org/apache/tez/dag/library/vertexmanager/ShuffleVertexManager.java b/tez-runtime-library/src/main/java/org/apache/tez/dag/library/vertexmanager/ShuffleVertexManager.java index ed27f04b8c..b05c45ad96 100644 --- a/tez-runtime-library/src/main/java/org/apache/tez/dag/library/vertexmanager/ShuffleVertexManager.java +++ b/tez-runtime-library/src/main/java/org/apache/tez/dag/library/vertexmanager/ShuffleVertexManager.java @@ -18,7 +18,7 @@ package org.apache.tez.dag.library.vertexmanager; -import com.google.common.base.Preconditions; +import org.apache.tez.common.Preconditions; import com.google.common.collect.Lists; import com.google.protobuf.ByteString; import com.google.protobuf.InvalidProtocolBufferException; diff --git a/tez-runtime-library/src/main/java/org/apache/tez/dag/library/vertexmanager/ShuffleVertexManagerBase.java b/tez-runtime-library/src/main/java/org/apache/tez/dag/library/vertexmanager/ShuffleVertexManagerBase.java index 967d0ea7a1..1d55c71944 100644 --- a/tez-runtime-library/src/main/java/org/apache/tez/dag/library/vertexmanager/ShuffleVertexManagerBase.java +++ b/tez-runtime-library/src/main/java/org/apache/tez/dag/library/vertexmanager/ShuffleVertexManagerBase.java @@ -19,7 +19,7 @@ package org.apache.tez.dag.library.vertexmanager; import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; +import org.apache.tez.common.Preconditions; import com.google.common.base.Predicate; import com.google.common.collect.Iterables; import com.google.common.collect.Lists; @@ -130,7 +130,9 @@ static class SourceVertexInfo { final BitSet finishedTaskSet; int numTasks; int numVMEventsReceived; + // The total uncompressed size long outputSize; + // The uncompressed size of each partition. The size might not be precise int[] statsInMB; EdgeManagerPluginDescriptor newDescriptor; @@ -148,9 +150,14 @@ int getNumTasks() { int getNumCompletedTasks() { return finishedTaskSet.cardinality(); } - int getExpectedStatsInMBAtIndex(int index) { + + BigInteger getExpectedStatsAtIndex(int index) { return (numVMEventsReceived == 0) ? - 0: statsInMB[index] * numTasks / numVMEventsReceived; + BigInteger.ZERO : + BigInteger.valueOf(statsInMB[index]). + multiply(BigInteger.valueOf(numTasks)). + divide(BigInteger.valueOf(numVMEventsReceived)). + multiply(BigInteger.valueOf(MB)); } } @@ -464,12 +471,17 @@ int getCurrentlyKnownStatsAtIndex(int index) { return stats; } - int getExpectedStatsAtIndex(int index) { - int stats = 0; + long getExpectedStatsAtIndex(int index) { + BigInteger stats = BigInteger.ZERO; for(SourceVertexInfo entry : getAllSourceVertexInfo()) { - stats += entry.getExpectedStatsInMBAtIndex(index); + stats = stats.add(entry.getExpectedStatsAtIndex(index)); + } + if (stats.compareTo(BigInteger.valueOf(Long.MAX_VALUE)) > 0) { + LOG.warn("Partition {}'s size {} exceeded Long.MAX_VALUE", index, stats); + return Long.MAX_VALUE; + } else { + return stats.longValue(); } - return stats; } /** @@ -538,8 +550,8 @@ Iterable getAllSourceVertexInfo() { return srcVertexInfo.values(); } - SourceVertexInfo getSourceVertexInfo(String vertextName) { - return srcVertexInfo.get(vertextName); + SourceVertexInfo getSourceVertexInfo(String vertexName) { + return srcVertexInfo.get(vertexName); } Iterable> getBipartiteInfo() { diff --git a/tez-runtime-library/src/main/java/org/apache/tez/dag/library/vertexmanager/VertexManagerWithConcurrentInput.java b/tez-runtime-library/src/main/java/org/apache/tez/dag/library/vertexmanager/VertexManagerWithConcurrentInput.java new file mode 100644 index 0000000000..caf5acd588 --- /dev/null +++ b/tez-runtime-library/src/main/java/org/apache/tez/dag/library/vertexmanager/VertexManagerWithConcurrentInput.java @@ -0,0 +1,245 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tez.dag.library.vertexmanager; + +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; +import org.apache.hadoop.conf.Configuration; +import org.apache.tez.common.TezUtils; +import org.apache.tez.dag.api.EdgeProperty; +import org.apache.tez.dag.api.EdgeProperty.ConcurrentEdgeTriggerType; +import org.apache.tez.dag.api.InputDescriptor; +import org.apache.tez.dag.api.TezConfiguration; +import org.apache.tez.dag.api.TezUncheckedException; +import org.apache.tez.dag.api.UserPayload; +import org.apache.tez.dag.api.VertexManagerPlugin; +import org.apache.tez.dag.api.VertexManagerPluginContext; +import org.apache.tez.dag.api.VertexManagerPluginDescriptor; +import org.apache.tez.dag.api.event.VertexState; +import org.apache.tez.dag.api.event.VertexStateUpdate; +import org.apache.tez.runtime.api.Event; +import org.apache.tez.runtime.api.TaskAttemptIdentifier; +import org.apache.tez.runtime.api.events.VertexManagerEvent; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import javax.annotation.Nullable; +import java.io.IOException; +import java.util.EnumSet; +import java.util.List; +import java.util.Map; +import java.util.concurrent.atomic.AtomicBoolean; + +import static org.apache.tez.dag.api.EdgeProperty.SchedulingType.CONCURRENT; + +public class VertexManagerWithConcurrentInput extends VertexManagerPlugin { + + private static final Logger LOG = LoggerFactory.getLogger(VertexManagerWithConcurrentInput.class); + + private final Map srcVerticesConfigured = Maps.newConcurrentMap(); + private int managedTasks; + private AtomicBoolean tasksScheduled = new AtomicBoolean(false); + private AtomicBoolean onVertexStartedDone = new AtomicBoolean(false); + private Configuration vertexConfig; + private String vertexName; + private ConcurrentEdgeTriggerType edgeTriggerType; + private volatile boolean allSrcVerticesConfigured; + + int completedUpstreamTasks; + + public VertexManagerWithConcurrentInput(VertexManagerPluginContext context) { + super(context); + } + + @Override + public void initialize() { + UserPayload userPayload = getContext().getUserPayload(); + if (userPayload == null || userPayload.getPayload() == null || + userPayload.getPayload().limit() == 0) { + throw new TezUncheckedException("Could not initialize VertexManagerWithConcurrentInput" + + " from provided user payload"); + } + managedTasks = getContext().getVertexNumTasks(getContext().getVertexName()); + Map edges = getContext().getInputVertexEdgeProperties(); + for (Map.Entry entry : edges.entrySet()) { + if (!CONCURRENT.equals(entry.getValue().getSchedulingType())) { + throw new TezUncheckedException("All input edges to vertex " + vertexName + + " must be CONCURRENT."); + } + String srcVertex = entry.getKey(); + srcVerticesConfigured.put(srcVertex, false); + getContext().registerForVertexStateUpdates(srcVertex, EnumSet.of(VertexState.CONFIGURED)); + } + + try { + vertexConfig = TezUtils.createConfFromUserPayload(getContext().getUserPayload()); + } catch (IOException e) { + throw new TezUncheckedException(e); + } + edgeTriggerType = ConcurrentEdgeTriggerType.valueOf( + vertexConfig.get(TezConfiguration.TEZ_CONCURRENT_EDGE_TRIGGER_TYPE, + TezConfiguration.TEZ_CONCURRENT_EDGE_TRIGGER_TYPE_DEFAULT)); + if (!ConcurrentEdgeTriggerType.SOURCE_VERTEX_CONFIGURED.equals(edgeTriggerType)) { + // pending TEZ-3999 + throw new TezUncheckedException("Only support SOURCE_VERTEX_CONFIGURED triggering type for now."); + } + LOG.info("VertexManagerWithConcurrentInput initialized with edgeTriggerType {}.", edgeTriggerType); + + vertexName = getContext().getVertexName(); + completedUpstreamTasks = 0; + } + + @Override + public synchronized void onVertexStarted(List completions) { + onVertexStartedDone.set(true); + scheduleTasks(); + } + + @Override + public synchronized void onVertexStateUpdated(VertexStateUpdate stateUpdate) { + VertexState state = stateUpdate.getVertexState(); + String fromVertex = stateUpdate.getVertexName(); + if (!srcVerticesConfigured.containsKey(fromVertex)) { + throw new IllegalArgumentException("Not expecting state update from vertex:" + + fromVertex + " in vertex: " + this.vertexName); + } + + if (!VertexState.CONFIGURED.equals(state)) { + throw new IllegalArgumentException("Received incorrect state notification : " + + state + " from vertex: " + fromVertex + " in vertex: " + this.vertexName); + } + + LOG.info("Received configured notification: " + state + " for vertex: " + + fromVertex + " in vertex: " + this.vertexName); + srcVerticesConfigured.put(fromVertex, true); + + // check for source vertices completely configured + boolean checkAllSrcVerticesConfigured = true; + for (Map.Entry entry : srcVerticesConfigured.entrySet()) { + if (!entry.getValue()) { + // vertex not configured + LOG.info("Waiting for vertex {} in vertex {} ", entry.getKey(), this.vertexName); + checkAllSrcVerticesConfigured = false; + break; + } + } + allSrcVerticesConfigured = checkAllSrcVerticesConfigured; + + scheduleTasks(); + } + + @Override + public synchronized void onSourceTaskCompleted(TaskAttemptIdentifier attempt) { + completedUpstreamTasks ++; + LOG.info("Source task attempt {} completion received at vertex {}", attempt, this.vertexName); + } + + @Override + public void onVertexManagerEventReceived(VertexManagerEvent vmEvent) { + } + + @Override + public void onRootVertexInitialized(String inputName, + InputDescriptor inputDescriptor, List events) { + } + + private void scheduleTasks() { + if (!onVertexStartedDone.get()) { + // vertex not started yet + return; + } + if (tasksScheduled.get()) { + // already scheduled + return; + } + + if (!canScheduleTasks()) { + return; + } + + tasksScheduled.compareAndSet(false, true); + List tasksToStart = Lists.newArrayListWithCapacity(managedTasks); + for (int i = 0; i < managedTasks; ++i) { + tasksToStart.add(VertexManagerPluginContext.ScheduleTaskRequest.create(i, null)); + } + + if (!tasksToStart.isEmpty()) { + LOG.info("Starting {} tasks in {}.", tasksToStart.size(), this.vertexName); + getContext().scheduleTasks(tasksToStart); + } + // all tasks scheduled. Can call vertexManagerDone(). + } + + private boolean canScheduleTasks() { + if (edgeTriggerType.equals(ConcurrentEdgeTriggerType.SOURCE_VERTEX_CONFIGURED)) { + return allSrcVerticesConfigured; + } else { + // pending TEZ-3999 + throw new TezUncheckedException("Only support SOURCE_VERTEX_CONFIGURED triggering type for now."); + } + } + + + /** + * Create a {@link VertexManagerPluginDescriptor} builder that can be used to + * configure the plugin. + * + * @param conf + * {@link Configuration} May be modified in place. May be null if the + * configuration parameters are to be set only via code. If + * configuration values may be changed at runtime via a config file + * then pass in a {@link Configuration} that is initialized from a + * config file. The parameters that are not overridden in code will + * be derived from the Configuration object. + * @return {@link ConcurrentInputVertexManagerConfigBuilder} + */ + public static ConcurrentInputVertexManagerConfigBuilder createConfigBuilder( + @Nullable Configuration conf) { + return new ConcurrentInputVertexManagerConfigBuilder(conf); + } + + /** + * Helper class to configure VertexManagerWithConcurrentInput + */ + public static final class ConcurrentInputVertexManagerConfigBuilder { + private final Configuration conf; + + private ConcurrentInputVertexManagerConfigBuilder(@Nullable Configuration conf) { + if (conf == null) { + this.conf = new Configuration(false); + } else { + this.conf = conf; + } + } + + public VertexManagerPluginDescriptor build() { + VertexManagerPluginDescriptor desc = + VertexManagerPluginDescriptor.create( + VertexManagerWithConcurrentInput.class.getName()); + + try { + return desc.setUserPayload(TezUtils + .createUserPayloadFromConf(this.conf)); + } catch (IOException e) { + throw new TezUncheckedException(e); + } + } + } + +} diff --git a/tez-runtime-library/src/main/java/org/apache/tez/http/HttpConnection.java b/tez-runtime-library/src/main/java/org/apache/tez/http/HttpConnection.java index 9bfe4e7099..0a4306a866 100644 --- a/tez-runtime-library/src/main/java/org/apache/tez/http/HttpConnection.java +++ b/tez-runtime-library/src/main/java/org/apache/tez/http/HttpConnection.java @@ -19,7 +19,7 @@ package org.apache.tez.http; import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; +import org.apache.tez.common.Preconditions; import org.apache.hadoop.io.DataOutputBuffer; import org.apache.hadoop.io.IOUtils; import org.apache.tez.common.security.JobTokenSecretManager; @@ -49,7 +49,7 @@ public class HttpConnection extends BaseHttpConnection { @VisibleForTesting protected volatile HttpURLConnection connection; private volatile DataInputStream input; - private volatile boolean connectionSucceeed; + private volatile boolean connectionSucceed; private volatile boolean cleanup; private final JobTokenSecretManager jobTokenSecretMgr; @@ -77,9 +77,7 @@ public HttpConnection(URL url, HttpConnectionParams connParams, this.url = url; this.stopWatch = new StopWatch(); this.urlLogCount = new AtomicLong(); - if (LOG.isDebugEnabled()) { - LOG.debug("MapOutput URL :" + url.toString()); - } + LOG.debug("MapOutput URL :{}", url); } @VisibleForTesting @@ -149,7 +147,7 @@ private boolean connect(int connectionTimeout) throws IOException { long connectStartTime = System.currentTimeMillis(); try { connection.connect(); - connectionSucceeed = true; + connectionSucceed = true; break; } catch (IOException ioe) { // Don't attempt another connect if already cleanedup. @@ -255,7 +253,7 @@ public void validate() throws IOException { @Override public DataInputStream getInputStream() throws IOException { stopWatch.reset().start(); - if (connectionSucceeed) { + if (connectionSucceed) { input = new DataInputStream(new BufferedInputStream( connection.getInputStream(), httpConnParams.getBufferSize())); } @@ -278,13 +276,11 @@ public void cleanup(boolean disconnect) throws IOException { stopWatch.reset().start(); try { if (input != null) { - if (LOG.isDebugEnabled()) { - LOG.debug("Closing input on " + logIdentifier); - } + LOG.debug("Closing input on {}", logIdentifier); input.close(); input = null; } - if (httpConnParams.isKeepAlive() && connectionSucceeed) { + if (httpConnParams.isKeepAlive() && connectionSucceed) { // Refer: // http://docs.oracle.com/javase/6/docs/technotes/guides/net/http-keepalive.html readErrorStream(connection.getErrorStream()); diff --git a/tez-runtime-library/src/main/java/org/apache/tez/http/HttpConnectionParams.java b/tez-runtime-library/src/main/java/org/apache/tez/http/HttpConnectionParams.java index aac4bb3861..9f1c002a20 100644 --- a/tez-runtime-library/src/main/java/org/apache/tez/http/HttpConnectionParams.java +++ b/tez-runtime-library/src/main/java/org/apache/tez/http/HttpConnectionParams.java @@ -76,7 +76,7 @@ public String toString() { sb.append("connectionTimeout=").append(connectionTimeout).append(", "); sb.append("readTimeout=").append(readTimeout).append(", "); sb.append("bufferSize=").append(bufferSize).append(", "); - sb.append("bufferSize=").append(bufferSize); + sb.append("sslShuffle=").append(sslShuffle); return sb.toString(); } } diff --git a/tez-runtime-library/src/main/java/org/apache/tez/http/SSLFactory.java b/tez-runtime-library/src/main/java/org/apache/tez/http/SSLFactory.java index e7a2dd0051..8c98764bfe 100644 --- a/tez-runtime-library/src/main/java/org/apache/tez/http/SSLFactory.java +++ b/tez-runtime-library/src/main/java/org/apache/tez/http/SSLFactory.java @@ -18,7 +18,12 @@ package org.apache.tez.http; -import com.ning.http.client.AsyncHttpClientConfig; +import org.asynchttpclient.DefaultAsyncHttpClientConfig; + +import io.netty.handler.ssl.ClientAuth; +import io.netty.handler.ssl.JdkSslContext; +import io.netty.handler.ssl.SupportedCipherSuiteFilter; + import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.conf.Configuration; @@ -36,6 +41,7 @@ import java.io.IOException; import java.net.HttpURLConnection; import java.security.GeneralSecurityException; +import java.util.Objects; import static org.apache.hadoop.security.ssl.SSLFactory.KEYSTORES_FACTORY_CLASS_KEY; import static org.apache.hadoop.security.ssl.SSLFactory.SSL_CLIENT_CONF_KEY; @@ -76,22 +82,20 @@ public class SSLFactory implements ConnectionConfigurator { * @param mode SSLFactory mode, client or server. * @param conf Hadoop configuration from where the SSLFactory configuration * will be read. + * @throws NullPointerException if {@code mode} or {@code conf} is {@code null} */ public SSLFactory(Mode mode, Configuration conf) { - this.conf = conf; - if (mode == null) { - throw new IllegalArgumentException("mode cannot be NULL"); - } - this.mode = mode; + this.conf = Objects.requireNonNull(conf); + this.mode = Objects.requireNonNull(mode, "mode cannot be NULL"); requireClientCert = conf.getBoolean(SSL_REQUIRE_CLIENT_CERT_KEY, DEFAULT_SSL_REQUIRE_CLIENT_CERT); - Configuration sslConf = readSSLConfiguration(mode); + // Rest of ssl configs are pre-populated in incoming conf payload + conf.setBoolean(SSL_REQUIRE_CLIENT_CERT_KEY, requireClientCert); Class klass = conf.getClass(KEYSTORES_FACTORY_CLASS_KEY, FileBasedKeyStoresFactory.class, KeyStoresFactory.class); - keystoresFactory = ReflectionUtils.newInstance(klass, sslConf); - + keystoresFactory = ReflectionUtils.newInstance(klass, conf); enabledProtocols = conf.getStrings(SSL_ENABLED_PROTOCOLS, DEFAULT_SSL_ENABLED_PROTOCOLS); } @@ -223,17 +227,20 @@ public HttpURLConnection configure(HttpURLConnection conn) throws IOException { } /** - * Set ssl context for {@link com.ning.http.client.AsyncHttpClientConfig.Builder} + * Set ssl context for {@link org.asynchttpclient.DefaultAsyncHttpClientConfig.Builder} * - * @param asyncNingBuilder {@link com.ning.http.client.AsyncHttpClientConfig.Builder} instance to + * @param builder {@link org.asynchttpclient.DefaultAsyncHttpClientConfig.Builder} instance to * configure. * @throws IOException if an IO error occurred. */ - public void configure(AsyncHttpClientConfig.Builder asyncNingBuilder) throws IOException { - if (asyncNingBuilder != null) { - asyncNingBuilder.setSSLContext(context); - asyncNingBuilder.setHostnameVerifier(getHostnameVerifier()); + public void configure(DefaultAsyncHttpClientConfig.Builder builder) throws IOException { + if (builder != null) { + JdkSslContext jdkSslContext = + new JdkSslContext(context, mode.equals(Mode.CLIENT), /* ciphers */null, + SupportedCipherSuiteFilter.INSTANCE, /* ApplicationProtocolConfig */ null, + requireClientCert ? ClientAuth.REQUIRE : ClientAuth.OPTIONAL, enabledProtocols, + /* startTls */ true); + builder.setSslContext(jdkSslContext); } } - } \ No newline at end of file diff --git a/tez-runtime-library/src/main/java/org/apache/tez/http/async/netty/AsyncHttpConnection.java b/tez-runtime-library/src/main/java/org/apache/tez/http/async/netty/AsyncHttpConnection.java index 735bb469c3..8f8a0f7d0b 100644 --- a/tez-runtime-library/src/main/java/org/apache/tez/http/async/netty/AsyncHttpConnection.java +++ b/tez-runtime-library/src/main/java/org/apache/tez/http/async/netty/AsyncHttpConnection.java @@ -19,22 +19,23 @@ package org.apache.tez.http.async.netty; import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; -import com.ning.http.client.AsyncHttpClient; -import com.ning.http.client.AsyncHttpClientConfig; -import com.ning.http.client.ListenableFuture; -import com.ning.http.client.Request; -import com.ning.http.client.RequestBuilder; -import com.ning.http.client.Response; +import org.apache.tez.common.Preconditions; +import org.asynchttpclient.AsyncHttpClient; +import org.asynchttpclient.DefaultAsyncHttpClient; +import org.asynchttpclient.DefaultAsyncHttpClientConfig; +import org.asynchttpclient.ListenableFuture; +import org.asynchttpclient.Request; +import org.asynchttpclient.RequestBuilder; +import org.asynchttpclient.Response; import org.apache.commons.io.IOUtils; import org.apache.tez.http.BaseHttpConnection; import org.apache.tez.http.HttpConnectionParams; import org.apache.tez.http.SSLFactory; import org.apache.tez.common.security.JobTokenSecretManager; -import org.apache.tez.runtime.library.api.TezRuntimeConfiguration; import org.apache.tez.runtime.library.common.security.SecureShuffleUtils; import org.apache.tez.runtime.library.common.shuffle.orderedgrouped.ShuffleHeader; import org.apache.tez.util.StopWatch; +import org.apache.tez.util.TezRuntimeShutdownHandler; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -77,7 +78,7 @@ private void initClient(HttpConnectionParams httpConnParams) throws IOException synchronized (AsyncHttpConnection.class) { if (httpAsyncClient == null) { LOG.info("Initializing AsyncClient (TezBodyDeferringAsyncHandler)"); - AsyncHttpClientConfig.Builder builder = new AsyncHttpClientConfig.Builder(); + DefaultAsyncHttpClientConfig.Builder builder = new DefaultAsyncHttpClientConfig.Builder(); if (httpConnParams.isSslShuffle()) { //Configure SSL SSLFactory sslFactory = httpConnParams.getSslFactory(); @@ -92,17 +93,26 @@ private void initClient(HttpConnectionParams httpConnParams) throws IOException * setMaxConnections & addRequestFilter. */ builder - .setAllowPoolingConnection(httpConnParams.isKeepAlive()) - .setAllowSslConnectionPool(httpConnParams.isKeepAlive()) - .setCompressionEnabled(false) + .setKeepAlive(httpConnParams.isKeepAlive()) + .setCompressionEnforced(false) //.setExecutorService(applicationThreadPool) - //.addRequestFilter(new ThrottleRequestFilter()) - .setMaximumConnectionsPerHost(1) - .setConnectionTimeoutInMs(httpConnParams.getConnectionTimeout()) - .setRequestTimeoutInMs(httpConnParams.getReadTimeout()) - .setUseRawUrl(true) + //.addRequestFilter(new ThrottleRequestFilter(1)) + .setMaxConnectionsPerHost(1) + .setConnectTimeout(httpConnParams.getConnectionTimeout()) + .setDisableUrlEncodingForBoundRequests(true) .build(); - httpAsyncClient = new AsyncHttpClient(builder.build()); + DefaultAsyncHttpClientConfig config = builder.build(); + httpAsyncClient = new DefaultAsyncHttpClient(config); + TezRuntimeShutdownHandler.addShutdownTask(() -> { + try { + if (httpAsyncClient != null) { + httpAsyncClient.close(); + httpAsyncClient = null; + } + } catch (IOException e) { + LOG.warn("Error while closing async client (this won't block shutdown)", e); + } + }); } } } @@ -114,9 +124,7 @@ public AsyncHttpConnection(URL url, HttpConnectionParams connParams, this.httpConnParams = connParams; this.url = url; this.stopWatch = new StopWatch(); - if (LOG.isDebugEnabled()) { - LOG.debug("MapOutput URL :" + url.toString()); - } + LOG.debug("MapOutput URL :{}", url); initClient(httpConnParams); pos = new PipedOutputStream(); @@ -148,7 +156,7 @@ public boolean connect() throws IOException, InterruptedException { Request request = rb.setUrl(url.toString()).build(); //for debugging - LOG.debug("Request url={}, encHash={}, id={}", url, encHash); + LOG.debug("Request url={}, encHash={}", url, encHash); try { //Blocks calling thread until it receives headers, but have the option to defer response body @@ -168,7 +176,7 @@ public boolean connect() throws IOException, InterruptedException { //verify the response int rc = response.getStatusCode(); if (rc != HttpURLConnection.HTTP_OK) { - LOG.debug("Request url={}, id={}", response.getUri()); + LOG.debug("Request url={}", response.getUri()); throw new IOException("Got invalid response code " + rc + " from " + url + ": " + response.getStatusText()); } @@ -210,7 +218,7 @@ public DataInputStream getInputStream() throws IOException, InterruptedException } @VisibleForTesting - public void close() { + public void close() throws IOException { httpAsyncClient.close(); httpAsyncClient = null; } diff --git a/tez-runtime-library/src/main/java/org/apache/tez/http/async/netty/TezBodyDeferringAsyncHandler.java b/tez-runtime-library/src/main/java/org/apache/tez/http/async/netty/TezBodyDeferringAsyncHandler.java index 8e83eaca64..31dccd089e 100644 --- a/tez-runtime-library/src/main/java/org/apache/tez/http/async/netty/TezBodyDeferringAsyncHandler.java +++ b/tez-runtime-library/src/main/java/org/apache/tez/http/async/netty/TezBodyDeferringAsyncHandler.java @@ -17,15 +17,16 @@ */ package org.apache.tez.http.async.netty; -import com.ning.http.client.AsyncHandler; -import com.ning.http.client.HttpResponseBodyPart; -import com.ning.http.client.HttpResponseHeaders; -import com.ning.http.client.HttpResponseStatus; -import com.ning.http.client.Response; +import org.asynchttpclient.AsyncHandler; +import org.asynchttpclient.HttpResponseBodyPart; +import org.asynchttpclient.HttpResponseStatus; +import org.asynchttpclient.Response; import org.apache.hadoop.classification.InterfaceAudience; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import io.netty.handler.codec.http.HttpHeaders; + import java.io.FilterInputStream; import java.io.IOException; import java.io.InputStream; @@ -38,8 +39,8 @@ import java.util.concurrent.TimeUnit; /** - * Same as {@link com.ning.http.client.BodyDeferringAsyncHandler} with additional checks handle - * errors in getResponse(). Based on testing, at very high load {@link com.ning.http.client + * Same as {@link org.asynchttpclient.BodyDeferringAsyncHandler} with additional checks handle + * errors in getResponse(). Based on testing, at very high load {@link org.asynchttpclient * .BodyDeferringAsyncHandler} gets to hung state in getResponse() as it tries to wait * indefinitely for headers to arrive. This class tries to fix the problem by waiting only for * the connection timeout. @@ -92,27 +93,28 @@ public void onThrowable(Throwable t) { } } - public AsyncHandler.STATE onStatusReceived(HttpResponseStatus responseStatus) throws Exception { + public AsyncHandler.State onStatusReceived(HttpResponseStatus responseStatus) throws Exception { responseBuilder.reset(); responseBuilder.accumulate(responseStatus); statusReceived = true; - return AsyncHandler.STATE.CONTINUE; + return AsyncHandler.State.CONTINUE; } - public AsyncHandler.STATE onHeadersReceived(HttpResponseHeaders headers) throws Exception { + @Override + public AsyncHandler.State onHeadersReceived(HttpHeaders headers) throws Exception { responseBuilder.accumulate(headers); - return AsyncHandler.STATE.CONTINUE; + return AsyncHandler.State.CONTINUE; } - public AsyncHandler.STATE onBodyPartReceived(HttpResponseBodyPart bodyPart) throws Exception { + public AsyncHandler.State onBodyPartReceived(HttpResponseBodyPart bodyPart) throws Exception { // body arrived, flush headers if (!responseSet) { response = responseBuilder.build(); responseSet = true; headersArrived.countDown(); } - bodyPart.writeTo(output); - return AsyncHandler.STATE.CONTINUE; + output.write(bodyPart.getBodyPartBytes()); + return AsyncHandler.State.CONTINUE; } protected void closeOut() throws IOException { @@ -154,7 +156,7 @@ public Response onCompleted() throws IOException { } /** - * This method -- unlike Future.get() -- will block only as long, + * This method -- unlike Future.get() -- will block only as long, * as headers arrive. This is useful for large transfers, to examine headers * ASAP, and defer body streaming to it's fine destination and prevent * unneeded bandwidth consumption. The response here will contain the very diff --git a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/api/TezRuntimeConfiguration.java b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/api/TezRuntimeConfiguration.java index 23f1f9bac7..de28286d9b 100644 --- a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/api/TezRuntimeConfiguration.java +++ b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/api/TezRuntimeConfiguration.java @@ -47,23 +47,22 @@ @Public @Evolving @ConfigurationClass(templateFileName = "tez-runtime-default-template.xml") -public class TezRuntimeConfiguration { +public final class TezRuntimeConfiguration { private static final String TEZ_RUNTIME_PREFIX = "tez.runtime."; - private static final Set tezRuntimeKeys = new HashSet(); - private static Set umnodifiableTezRuntimeKeySet; - private static final Set otherKeys = new HashSet(); - private static Set unmodifiableOtherKeySet; - private static Configuration defaultConf = new Configuration(false); - private static final Map tezRuntimeConfMap = new HashMap(); - private static final Map otherConfMap = new HashMap(); + private static final Set TEZ_RUNTIME_KEYS = new HashSet<>(); + private static final Set UMNODIFIABLE_TEZ_RUNTIME_KEY_SET; + private static final Set OTHER_KEYS = new HashSet<>(); + private static final Set UNMODIFIABLE_OTHER_KEY_SET; + private static final Configuration DEFAULT_CONF = new Configuration(false); + private static final Map TEZ_RUNTIME_CONF_MAP = new HashMap<>(); + private static final Map OTHER_CONF_MAP = new HashMap<>(); /** * Prefixes from Hadoop configuration which are allowed. */ - private static final List allowedPrefixes = new ArrayList(); - private static List unmodifiableAllowedPrefixes; + private static final List ALLOWED_PREFIXES = new ArrayList<>(); static { @@ -71,6 +70,8 @@ public class TezRuntimeConfiguration { TezRuntimeConfiguration.TEZ_RUNTIME_PIPELINED_SORTER_SORT_THREADS); } + private TezRuntimeConfiguration() {} + /** * Configuration key to enable/disable IFile readahead. */ @@ -238,10 +239,19 @@ public class TezRuntimeConfiguration { "shuffle.fetch.failures.limit"; public static final int TEZ_RUNTIME_SHUFFLE_FETCH_FAILURES_LIMIT_DEFAULT = 5; + /** + * Specifies in milliseconds the maximum delay a penalized host can have before being retried, + * defaults to 10 minutes. + */ + @ConfigurationProperty(type = "integer") + public static final String TEZ_RUNTIME_SHUFFLE_HOST_PENALTY_TIME_LIMIT_MS = TEZ_RUNTIME_PREFIX + + "shuffle.host.penalty.time.limit"; + public static final int TEZ_RUNTIME_SHUFFLE_HOST_PENALTY_TIME_LIMIT_MS_DEFAULT = 600000; + @Private @Unstable @ConfigurationProperty(type = "integer") - /** + /* * Expert setting made available only for debugging. Do not change it. Sets * the number of retries before giving up on downloading from source * attempt by consumer. Code internally handles the threshold if set to -1. @@ -255,7 +265,7 @@ public class TezRuntimeConfiguration { @Private @Unstable @ConfigurationProperty(type = "float") - /** + /* * Expert setting made available only for debugging. Do not change it. Setting * to determine if failures happened across a percentage of nodes. This * helps in determining if the consumer has to be restarted on continuous @@ -271,7 +281,7 @@ public class TezRuntimeConfiguration { @Private @Unstable @ConfigurationProperty(type = "integer") - /** + /* * Expert setting made available only for debugging. Do not change it. Setting * to determine if the consumer has to be restarted on continuous * failures across nodes. Used along with {@link @@ -285,7 +295,7 @@ public class TezRuntimeConfiguration { @Private @Unstable @ConfigurationProperty(type = "float") - /** + /* * Expert setting made available only for debugging. Do not change it. * Maximum percentage of time (compared to overall progress), the fetcher is * allowed before concluding that it is stalled. @@ -298,7 +308,7 @@ public class TezRuntimeConfiguration { @Private @Unstable @ConfigurationProperty(type = "float") - /** + /* * Expert setting made available only for debugging. Do not change it. * Fraction to determine whether the shuffle has progressed enough or not * If it has not progressed enough, it could be qualified for the consumer. @@ -312,7 +322,7 @@ public class TezRuntimeConfiguration { @Private @Unstable @ConfigurationProperty(type = "float") - /** + /* * Expert setting made available only for debugging. Do not change it. * Provides threshold for determining whether fetching has to be marked * unhealthy based on the ratio of (failures/(failures+completed)) @@ -326,7 +336,7 @@ public class TezRuntimeConfiguration { @Private @Unstable @ConfigurationProperty(type = "boolean") - /** + /* * Expert setting made available only for debugging. Do not change it. * Provides threshold for determining whether fetching has to be marked * unhealthy based on the ratio of (failures/(failures+completed)) @@ -405,6 +415,29 @@ public class TezRuntimeConfiguration { public static final float TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT_DEFAULT = 0.90f; + /** + * Enables fetch failures by a configuration. Should be used for testing only. + */ + @ConfigurationProperty(type = "boolean") + public static final String TEZ_RUNTIME_SHUFFLE_FETCH_ENABLE_TESTING_ERRORS = + TEZ_RUNTIME_PREFIX + "shuffle.fetch.testing.errors.enable"; + public static final boolean TEZ_RUNTIME_SHUFFLE_FETCH_ENABLE_TESTING_ERRORS_DEFAULT = false; + + /** + * Configures the injectable fetch failures, in a form of: + * maphost#mapvertex#probability#comma,separated,features + * Possible values are (fetch fails...): + * "*#*#50": from all map hosts with 50% likelihood + * "_first_#*#80": for the first ever seen map host with 80% likelihood (user doesn't want to use hostnames) + * "host1#*#100": from host1 with 100% likelihood (simulates single node failure) + * "host1#Map_1#100": from host1 for Map 1 source tasks with 100% likelihood + * "host1#Map_1#100#fail_only_first": as above but only for input attempts with index 0 + */ + @ConfigurationProperty() + public static final String TEZ_RUNTIME_SHUFFLE_FETCH_TESTING_ERRORS_CONFIG = + TEZ_RUNTIME_PREFIX + "shuffle.fetch.testing.errors.config"; + public static final String TEZ_RUNTIME_SHUFFLE_FETCH_TESTING_ERRORS_CONFIG_DEFAULT = "*#50"; + @ConfigurationProperty(type = "float") public static final String TEZ_RUNTIME_SHUFFLE_MEMORY_LIMIT_PERCENT = TEZ_RUNTIME_PREFIX + "shuffle.memory.limit.percent"; @@ -475,6 +508,26 @@ public class TezRuntimeConfiguration { "empty.partitions.info-via-events.enabled"; public static final boolean TEZ_RUNTIME_EMPTY_PARTITION_INFO_VIA_EVENTS_ENABLED_DEFAULT = true; + @Private + public static final String TEZ_RUNTIME_TRANSFER_DATA_VIA_EVENTS_ENABLED = + TEZ_RUNTIME_PREFIX + "transfer.data-via-events.enabled"; + @Private + public static final boolean TEZ_RUNTIME_TRANSFER_DATA_VIA_EVENTS_ENABLED_DEFAULT = true; + + @Private + public static final String TEZ_RUNTIME_TRANSFER_DATA_VIA_EVENTS_MAX_SIZE = + TEZ_RUNTIME_PREFIX + "transfer.data-via-events.max-size"; + @Private + public static final int TEZ_RUNTIME_TRANSFER_DATA_VIA_EVENTS_MAX_SIZE_DEFAULT = 512; + + @Private + public static final String TEZ_RUNTIME_TRANSFER_DATA_VIA_EVENTS_SUPPORT_IN_MEM_FILE = + TEZ_RUNTIME_PREFIX + "transfer.data-via-events.support.in-mem.file"; + + @Private + public static final boolean TEZ_RUNTIME_TRANSFER_DATA_VIA_EVENTS_SUPPORT_IN_MEM_FILE_DEFAULT + = true; + /** * If the shuffle input is on the local host bypass the http fetch and access the files directly */ @@ -503,9 +556,18 @@ public class TezRuntimeConfiguration { TEZ_RUNTIME_PREFIX + "enable.final-merge.in.output"; public static final boolean TEZ_RUNTIME_ENABLE_FINAL_MERGE_IN_OUTPUT_DEFAULT = true; + /** + * Expert level setting. How long should @link{ShuffleManager} wait for batching + * before sending the events in milliseconds. Set to -1 to not wait. + */ + @ConfigurationProperty(type = "integer") + public static final String TEZ_RUNTIME_SHUFFLE_BATCH_WAIT = + TEZ_RUNTIME_PREFIX + "shuffle.batch.wait"; + public static final int TEZ_RUNTIME_SHUFFLE_BATCH_WAIT_DEFAULT = -1; + /** - * Share data fetched between tasks running on the same host if applicable + * Share data fetched between tasks running on the same host if applicable. */ @ConfigurationProperty(type = "boolean") public static final String TEZ_RUNTIME_OPTIMIZE_SHARED_FETCH = TEZ_RUNTIME_PREFIX @@ -547,122 +609,130 @@ public class TezRuntimeConfiguration { public static final long TEZ_RUNTIME_RECORDS_BEFORE_PROGRESS_DEFAULT = 10000; static { - tezRuntimeKeys.add(TEZ_RUNTIME_IFILE_READAHEAD); - tezRuntimeKeys.add(TEZ_RUNTIME_IFILE_READAHEAD_BYTES); - tezRuntimeKeys.add(TEZ_RUNTIME_IO_FILE_BUFFER_SIZE); - tezRuntimeKeys.add(TEZ_RUNTIME_IO_SORT_FACTOR); - tezRuntimeKeys.add(TEZ_RUNTIME_SORT_SPILL_PERCENT); - tezRuntimeKeys.add(TEZ_RUNTIME_IO_SORT_MB); - tezRuntimeKeys.add(TEZ_RUNTIME_INDEX_CACHE_MEMORY_LIMIT_BYTES); - tezRuntimeKeys.add(TEZ_RUNTIME_COMBINE_MIN_SPILLS); - tezRuntimeKeys.add(TEZ_RUNTIME_PIPELINED_SORTER_SORT_THREADS); - tezRuntimeKeys.add( + TEZ_RUNTIME_KEYS.add(TEZ_RUNTIME_IFILE_READAHEAD); + TEZ_RUNTIME_KEYS.add(TEZ_RUNTIME_IFILE_READAHEAD_BYTES); + TEZ_RUNTIME_KEYS.add(TEZ_RUNTIME_IO_FILE_BUFFER_SIZE); + TEZ_RUNTIME_KEYS.add(TEZ_RUNTIME_IO_SORT_FACTOR); + TEZ_RUNTIME_KEYS.add(TEZ_RUNTIME_SORT_SPILL_PERCENT); + TEZ_RUNTIME_KEYS.add(TEZ_RUNTIME_IO_SORT_MB); + TEZ_RUNTIME_KEYS.add(TEZ_RUNTIME_INDEX_CACHE_MEMORY_LIMIT_BYTES); + TEZ_RUNTIME_KEYS.add(TEZ_RUNTIME_COMBINE_MIN_SPILLS); + TEZ_RUNTIME_KEYS.add(TEZ_RUNTIME_PIPELINED_SORTER_SORT_THREADS); + TEZ_RUNTIME_KEYS.add( TEZ_RUNTIME_PIPELINED_SORTER_MIN_BLOCK_SIZE_IN_MB); - tezRuntimeKeys.add(TEZ_RUNTIME_PIPELINED_SORTER_LAZY_ALLOCATE_MEMORY); - tezRuntimeKeys.add(TEZ_RUNTIME_UNORDERED_OUTPUT_BUFFER_SIZE_MB); - tezRuntimeKeys.add(TEZ_RUNTIME_UNORDERED_OUTPUT_MAX_PER_BUFFER_SIZE_BYTES); - tezRuntimeKeys.add(TEZ_RUNTIME_PARTITIONER_CLASS); - tezRuntimeKeys.add(TEZ_RUNTIME_COMBINER_CLASS); - tezRuntimeKeys.add(TEZ_RUNTIME_SHUFFLE_USE_ASYNC_HTTP); - tezRuntimeKeys.add(TEZ_RUNTIME_SHUFFLE_PARALLEL_COPIES); - tezRuntimeKeys.add(TEZ_RUNTIME_SHUFFLE_FETCH_FAILURES_LIMIT); - tezRuntimeKeys.add(TEZ_RUNTIME_SHUFFLE_FETCH_MAX_TASK_OUTPUT_AT_ONCE); - tezRuntimeKeys.add(TEZ_RUNTIME_SHUFFLE_NOTIFY_READERROR); - tezRuntimeKeys.add(TEZ_RUNTIME_SHUFFLE_CONNECT_TIMEOUT); - tezRuntimeKeys.add(TEZ_RUNTIME_SHUFFLE_KEEP_ALIVE_ENABLED); - tezRuntimeKeys.add(TEZ_RUNTIME_SHUFFLE_KEEP_ALIVE_MAX_CONNECTIONS); - tezRuntimeKeys.add(TEZ_RUNTIME_SHUFFLE_READ_TIMEOUT); - tezRuntimeKeys.add(TEZ_RUNTIME_SHUFFLE_BUFFER_SIZE); - tezRuntimeKeys.add(TEZ_RUNTIME_SHUFFLE_ENABLE_SSL); - tezRuntimeKeys.add(TEZ_RUNTIME_SHUFFLE_FETCH_VERIFY_DISK_CHECKSUM); - tezRuntimeKeys.add(TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT); - tezRuntimeKeys.add(TEZ_RUNTIME_SHUFFLE_MEMORY_LIMIT_PERCENT); - tezRuntimeKeys.add(TEZ_RUNTIME_SHUFFLE_MERGE_PERCENT); - tezRuntimeKeys.add(TEZ_RUNTIME_SHUFFLE_MEMTOMEM_SEGMENTS); - tezRuntimeKeys.add(TEZ_RUNTIME_SHUFFLE_ENABLE_MEMTOMEM); - tezRuntimeKeys.add + TEZ_RUNTIME_KEYS.add(TEZ_RUNTIME_PIPELINED_SORTER_LAZY_ALLOCATE_MEMORY); + TEZ_RUNTIME_KEYS.add(TEZ_RUNTIME_UNORDERED_OUTPUT_BUFFER_SIZE_MB); + TEZ_RUNTIME_KEYS.add(TEZ_RUNTIME_UNORDERED_OUTPUT_MAX_PER_BUFFER_SIZE_BYTES); + TEZ_RUNTIME_KEYS.add(TEZ_RUNTIME_PARTITIONER_CLASS); + TEZ_RUNTIME_KEYS.add(TEZ_RUNTIME_COMBINER_CLASS); + TEZ_RUNTIME_KEYS.add(TEZ_RUNTIME_SHUFFLE_USE_ASYNC_HTTP); + TEZ_RUNTIME_KEYS.add(TEZ_RUNTIME_SHUFFLE_PARALLEL_COPIES); + TEZ_RUNTIME_KEYS.add(TEZ_RUNTIME_SHUFFLE_FETCH_FAILURES_LIMIT); + TEZ_RUNTIME_KEYS.add(TEZ_RUNTIME_SHUFFLE_FETCH_MAX_TASK_OUTPUT_AT_ONCE); + TEZ_RUNTIME_KEYS.add(TEZ_RUNTIME_SHUFFLE_NOTIFY_READERROR); + TEZ_RUNTIME_KEYS.add(TEZ_RUNTIME_SHUFFLE_CONNECT_TIMEOUT); + TEZ_RUNTIME_KEYS.add(TEZ_RUNTIME_SHUFFLE_KEEP_ALIVE_ENABLED); + TEZ_RUNTIME_KEYS.add(TEZ_RUNTIME_SHUFFLE_KEEP_ALIVE_MAX_CONNECTIONS); + TEZ_RUNTIME_KEYS.add(TEZ_RUNTIME_SHUFFLE_READ_TIMEOUT); + TEZ_RUNTIME_KEYS.add(TEZ_RUNTIME_SHUFFLE_BUFFER_SIZE); + TEZ_RUNTIME_KEYS.add(TEZ_RUNTIME_SHUFFLE_ENABLE_SSL); + TEZ_RUNTIME_KEYS.add(TEZ_RUNTIME_SHUFFLE_FETCH_VERIFY_DISK_CHECKSUM); + TEZ_RUNTIME_KEYS.add(TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT); + TEZ_RUNTIME_KEYS.add(TEZ_RUNTIME_SHUFFLE_MEMORY_LIMIT_PERCENT); + TEZ_RUNTIME_KEYS.add(TEZ_RUNTIME_SHUFFLE_MERGE_PERCENT); + TEZ_RUNTIME_KEYS.add(TEZ_RUNTIME_SHUFFLE_MEMTOMEM_SEGMENTS); + TEZ_RUNTIME_KEYS.add(TEZ_RUNTIME_SHUFFLE_ENABLE_MEMTOMEM); + TEZ_RUNTIME_KEYS.add (TEZ_RUNTIME_SHUFFLE_ACCEPTABLE_HOST_FETCH_FAILURE_FRACTION); - tezRuntimeKeys.add(TEZ_RUNTIME_SHUFFLE_MIN_FAILURES_PER_HOST); - tezRuntimeKeys.add(TEZ_RUNTIME_SHUFFLE_MAX_STALL_TIME_FRACTION); - tezRuntimeKeys.add(TEZ_RUNTIME_SHUFFLE_SOURCE_ATTEMPT_ABORT_LIMIT); - tezRuntimeKeys.add(TEZ_RUNTIME_SHUFFLE_MAX_ALLOWED_FAILED_FETCH_ATTEMPT_FRACTION); - tezRuntimeKeys.add(TEZ_RUNTIME_SHUFFLE_MIN_REQUIRED_PROGRESS_FRACTION); - tezRuntimeKeys.add(TEZ_RUNTIME_SHUFFLE_FAILED_CHECK_SINCE_LAST_COMPLETION); - tezRuntimeKeys.add(TEZ_RUNTIME_REPORT_PARTITION_STATS); - tezRuntimeKeys.add(TEZ_RUNTIME_INPUT_POST_MERGE_BUFFER_PERCENT); - tezRuntimeKeys.add(TEZ_RUNTIME_GROUP_COMPARATOR_CLASS); - tezRuntimeKeys.add(TEZ_RUNTIME_INTERNAL_SORTER_CLASS); - tezRuntimeKeys.add(TEZ_RUNTIME_KEY_COMPARATOR_CLASS); - tezRuntimeKeys.add(TEZ_RUNTIME_KEY_CLASS); - tezRuntimeKeys.add(TEZ_RUNTIME_VALUE_CLASS); - tezRuntimeKeys.add(TEZ_RUNTIME_COMPRESS); - tezRuntimeKeys.add(TEZ_RUNTIME_COMPRESS_CODEC); - tezRuntimeKeys.add(TEZ_RUNTIME_KEY_SECONDARY_COMPARATOR_CLASS); - tezRuntimeKeys.add(TEZ_RUNTIME_EMPTY_PARTITION_INFO_VIA_EVENTS_ENABLED); - tezRuntimeKeys.add(TEZ_RUNTIME_PIPELINED_SHUFFLE_ENABLED); - tezRuntimeKeys.add(TEZ_RUNTIME_ENABLE_FINAL_MERGE_IN_OUTPUT); - tezRuntimeKeys.add(TEZ_RUNTIME_RECORDS_BEFORE_PROGRESS); - tezRuntimeKeys.add(TEZ_RUNTIME_OPTIMIZE_LOCAL_FETCH); - tezRuntimeKeys.add(TEZ_RUNTIME_OPTIMIZE_SHARED_FETCH); - tezRuntimeKeys.add(TEZ_RUNTIME_CONVERT_USER_PAYLOAD_TO_HISTORY_TEXT); - tezRuntimeKeys.add(TEZ_RUNTIME_SORTER_CLASS); - tezRuntimeKeys.add(TEZ_RUNTIME_CLEANUP_FILES_ON_INTERRUPT); - tezRuntimeKeys.add(TEZ_RUNTIME_UNORDERED_PARTITIONED_KVWRITER_BUFFER_MERGE_PERCENT); - tezRuntimeKeys.add(TEZ_RUNTIME_SHUFFLE_FETCHER_USE_SHARED_POOL); - - defaultConf.addResource("core-default.xml"); - defaultConf.addResource("core-site.xml"); - defaultConf.addResource("tez-site.xml"); - - for (Map.Entry confEntry : defaultConf) { - if (tezRuntimeKeys.contains(confEntry.getKey())) { - tezRuntimeConfMap.put(confEntry.getKey(), confEntry.getValue()); + TEZ_RUNTIME_KEYS.add(TEZ_RUNTIME_SHUFFLE_MIN_FAILURES_PER_HOST); + TEZ_RUNTIME_KEYS.add(TEZ_RUNTIME_SHUFFLE_MAX_STALL_TIME_FRACTION); + TEZ_RUNTIME_KEYS.add(TEZ_RUNTIME_SHUFFLE_SOURCE_ATTEMPT_ABORT_LIMIT); + TEZ_RUNTIME_KEYS.add(TEZ_RUNTIME_SHUFFLE_MAX_ALLOWED_FAILED_FETCH_ATTEMPT_FRACTION); + TEZ_RUNTIME_KEYS.add(TEZ_RUNTIME_SHUFFLE_MIN_REQUIRED_PROGRESS_FRACTION); + TEZ_RUNTIME_KEYS.add(TEZ_RUNTIME_SHUFFLE_FAILED_CHECK_SINCE_LAST_COMPLETION); + TEZ_RUNTIME_KEYS.add(TEZ_RUNTIME_SHUFFLE_FETCH_TESTING_ERRORS_CONFIG); + TEZ_RUNTIME_KEYS.add(TEZ_RUNTIME_SHUFFLE_FETCH_ENABLE_TESTING_ERRORS); + TEZ_RUNTIME_KEYS.add(TEZ_RUNTIME_REPORT_PARTITION_STATS); + TEZ_RUNTIME_KEYS.add(TEZ_RUNTIME_INPUT_POST_MERGE_BUFFER_PERCENT); + TEZ_RUNTIME_KEYS.add(TEZ_RUNTIME_GROUP_COMPARATOR_CLASS); + TEZ_RUNTIME_KEYS.add(TEZ_RUNTIME_INTERNAL_SORTER_CLASS); + TEZ_RUNTIME_KEYS.add(TEZ_RUNTIME_KEY_COMPARATOR_CLASS); + TEZ_RUNTIME_KEYS.add(TEZ_RUNTIME_KEY_CLASS); + TEZ_RUNTIME_KEYS.add(TEZ_RUNTIME_VALUE_CLASS); + TEZ_RUNTIME_KEYS.add(TEZ_RUNTIME_COMPRESS); + TEZ_RUNTIME_KEYS.add(TEZ_RUNTIME_COMPRESS_CODEC); + TEZ_RUNTIME_KEYS.add(TEZ_RUNTIME_KEY_SECONDARY_COMPARATOR_CLASS); + TEZ_RUNTIME_KEYS.add(TEZ_RUNTIME_EMPTY_PARTITION_INFO_VIA_EVENTS_ENABLED); + TEZ_RUNTIME_KEYS.add(TEZ_RUNTIME_PIPELINED_SHUFFLE_ENABLED); + TEZ_RUNTIME_KEYS.add(TEZ_RUNTIME_ENABLE_FINAL_MERGE_IN_OUTPUT); + TEZ_RUNTIME_KEYS.add(TEZ_RUNTIME_TRANSFER_DATA_VIA_EVENTS_ENABLED); + TEZ_RUNTIME_KEYS.add(TEZ_RUNTIME_TRANSFER_DATA_VIA_EVENTS_MAX_SIZE); + TEZ_RUNTIME_KEYS.add(TEZ_RUNTIME_TRANSFER_DATA_VIA_EVENTS_SUPPORT_IN_MEM_FILE); + TEZ_RUNTIME_KEYS.add(TEZ_RUNTIME_RECORDS_BEFORE_PROGRESS); + TEZ_RUNTIME_KEYS.add(TEZ_RUNTIME_OPTIMIZE_LOCAL_FETCH); + TEZ_RUNTIME_KEYS.add(TEZ_RUNTIME_OPTIMIZE_SHARED_FETCH); + TEZ_RUNTIME_KEYS.add(TEZ_RUNTIME_CONVERT_USER_PAYLOAD_TO_HISTORY_TEXT); + TEZ_RUNTIME_KEYS.add(TEZ_RUNTIME_SORTER_CLASS); + TEZ_RUNTIME_KEYS.add(TEZ_RUNTIME_CLEANUP_FILES_ON_INTERRUPT); + TEZ_RUNTIME_KEYS.add(TEZ_RUNTIME_UNORDERED_PARTITIONED_KVWRITER_BUFFER_MERGE_PERCENT); + TEZ_RUNTIME_KEYS.add(TEZ_RUNTIME_SHUFFLE_FETCHER_USE_SHARED_POOL); + TEZ_RUNTIME_KEYS.add(TEZ_RUNTIME_SHUFFLE_HOST_PENALTY_TIME_LIMIT_MS); + TEZ_RUNTIME_KEYS.add(TEZ_RUNTIME_SHUFFLE_BATCH_WAIT); + + DEFAULT_CONF.addResource("core-default.xml"); + DEFAULT_CONF.addResource("core-site.xml"); + DEFAULT_CONF.addResource("tez-site.xml"); + + for (Map.Entry confEntry : DEFAULT_CONF) { + if (TEZ_RUNTIME_KEYS.contains(confEntry.getKey())) { + TEZ_RUNTIME_CONF_MAP.put(confEntry.getKey(), confEntry.getValue()); } else { // TODO TEZ-1232 Filter out parameters from TezConfiguration, and Task specific confs - otherConfMap.put(confEntry.getKey(), confEntry.getValue()); - otherKeys.add(confEntry.getKey()); + OTHER_CONF_MAP.put(confEntry.getKey(), confEntry.getValue()); + OTHER_KEYS.add(confEntry.getKey()); } } // Do NOT need all prefixes from the following list. Only specific ones are allowed - // "hadoop.", "hadoop.security", "io.", "fs.", "ipc.", "net.", "file.", "dfs.", "ha.", "s3.", "nfs3.", "rpc." - allowedPrefixes.add("io."); - allowedPrefixes.add("file."); - allowedPrefixes.add("fs."); - - umnodifiableTezRuntimeKeySet = Collections.unmodifiableSet(tezRuntimeKeys); - unmodifiableOtherKeySet = Collections.unmodifiableSet(otherKeys); - unmodifiableAllowedPrefixes = Collections.unmodifiableList(allowedPrefixes); + // "hadoop.", "hadoop.security", "io.", "fs.", "ipc.", "net.", "file.", "dfs.", "ha.", "s3.", "nfs3.", "rpc.", "ssl." + ALLOWED_PREFIXES.add("io."); + ALLOWED_PREFIXES.add("file."); + ALLOWED_PREFIXES.add("fs."); + ALLOWED_PREFIXES.add("ssl."); + + UMNODIFIABLE_TEZ_RUNTIME_KEY_SET = Collections.unmodifiableSet(TEZ_RUNTIME_KEYS); + UNMODIFIABLE_OTHER_KEY_SET = Collections.unmodifiableSet(OTHER_KEYS); + List unmodifiableAllowedPrefixes = Collections.unmodifiableList(ALLOWED_PREFIXES); } @Private public static Set getRuntimeConfigKeySet() { - return umnodifiableTezRuntimeKeySet; + return UMNODIFIABLE_TEZ_RUNTIME_KEY_SET; } @Private public static Set getRuntimeAdditionalConfigKeySet() { - return unmodifiableOtherKeySet; + return UNMODIFIABLE_OTHER_KEY_SET; } @Private public static List getAllowedPrefixes() { - return allowedPrefixes; + return ALLOWED_PREFIXES; } @Private public static Map getTezRuntimeConfigDefaults() { - return Collections.unmodifiableMap(tezRuntimeConfMap); + return Collections.unmodifiableMap(TEZ_RUNTIME_CONF_MAP); } @Private public static Map getOtherConfigDefaults() { - return Collections.unmodifiableMap(otherConfMap); + return Collections.unmodifiableMap(OTHER_CONF_MAP); } public enum ReportPartitionStats { @Deprecated - /** + /* * Don't report partition stats. It is the same as NONE. * It is defined to maintain backward compatibility given * Configuration @link{#TEZ_RUNTIME_REPORT_PARTITION_STATS} used @@ -671,7 +741,7 @@ public enum ReportPartitionStats { DISABLED("false"), @Deprecated - /** + /* * Report partition stats. It is the same as MEMORY_OPTIMIZED. * It is defined to maintain backward compatibility given * Configuration @link{#TEZ_RUNTIME_REPORT_PARTITION_STATS} used @@ -697,7 +767,7 @@ public enum ReportPartitionStats { private final String type; - private ReportPartitionStats(String type) { + ReportPartitionStats(String type) { this.type = type; } diff --git a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/cartesianproduct/CartesianProductCombination.java b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/cartesianproduct/CartesianProductCombination.java index 8de8a026c0..c1eae7019e 100644 --- a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/cartesianproduct/CartesianProductCombination.java +++ b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/cartesianproduct/CartesianProductCombination.java @@ -17,7 +17,7 @@ */ package org.apache.tez.runtime.library.cartesianproduct; -import com.google.common.base.Preconditions; +import org.apache.tez.common.Preconditions; import com.google.common.primitives.Ints; import java.util.Arrays; diff --git a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/cartesianproduct/CartesianProductConfig.java b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/cartesianproduct/CartesianProductConfig.java index 7aac1d73ee..e47b083127 100644 --- a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/cartesianproduct/CartesianProductConfig.java +++ b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/cartesianproduct/CartesianProductConfig.java @@ -18,7 +18,7 @@ package org.apache.tez.runtime.library.cartesianproduct; import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; +import org.apache.tez.common.Preconditions; import com.google.common.primitives.Ints; import com.google.protobuf.ByteString; import com.google.protobuf.InvalidProtocolBufferException; diff --git a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/cartesianproduct/CartesianProductEdgeManager.java b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/cartesianproduct/CartesianProductEdgeManager.java index a406c1ba56..0a3346ea16 100644 --- a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/cartesianproduct/CartesianProductEdgeManager.java +++ b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/cartesianproduct/CartesianProductEdgeManager.java @@ -18,7 +18,7 @@ package org.apache.tez.runtime.library.cartesianproduct; import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; +import org.apache.tez.common.Preconditions; import com.google.protobuf.ByteString; import org.apache.tez.dag.api.EdgeManagerPluginContext; import org.apache.tez.dag.api.EdgeManagerPluginOnDemand; diff --git a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/cartesianproduct/CartesianProductVertexManager.java b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/cartesianproduct/CartesianProductVertexManager.java index ff2259302f..d1fcece722 100644 --- a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/cartesianproduct/CartesianProductVertexManager.java +++ b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/cartesianproduct/CartesianProductVertexManager.java @@ -18,7 +18,7 @@ package org.apache.tez.runtime.library.cartesianproduct; import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; +import org.apache.tez.common.Preconditions; import com.google.protobuf.ByteString; import org.apache.tez.dag.api.EdgeManagerPluginDescriptor; import org.apache.tez.dag.api.EdgeProperty; diff --git a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/CompositeInputAttemptIdentifier.java b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/CompositeInputAttemptIdentifier.java index 30295bd399..e07e687664 100644 --- a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/CompositeInputAttemptIdentifier.java +++ b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/CompositeInputAttemptIdentifier.java @@ -18,6 +18,7 @@ package org.apache.tez.runtime.library.common; +import com.google.common.collect.Range; import org.apache.hadoop.classification.InterfaceAudience.Private; /** @@ -50,6 +51,14 @@ public InputAttemptIdentifier expand(int inputIdentifierOffset) { return new InputAttemptIdentifier(getInputIdentifier() + inputIdentifierOffset, getAttemptNumber(), getPathComponent(), isShared(), getFetchTypeInfo(), getSpillEventId()); } + public boolean includes(InputAttemptIdentifier thatInputAttemptIdentifier) { + Range inputRange = + Range.closedOpen(super.getInputIdentifier(), super.getInputIdentifier() + inputIdentifierCount); + + return inputRange.contains(thatInputAttemptIdentifier.getInputIdentifier()) && + super.getAttemptNumber() == thatInputAttemptIdentifier.getAttemptNumber(); + } + // PathComponent & shared does not need to be part of the hashCode and equals computation. @Override public int hashCode() { @@ -63,6 +72,6 @@ public boolean equals(Object obj) { @Override public String toString() { - return super.toString(); + return super.toString() + ", count=" + inputIdentifierCount; } } diff --git a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/ConfigUtils.java b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/ConfigUtils.java index 6aa797fd0c..3bb85e33c4 100644 --- a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/ConfigUtils.java +++ b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/ConfigUtils.java @@ -22,10 +22,9 @@ import java.util.HashSet; import java.util.List; import java.util.Map; +import java.util.Objects; import java.util.Set; -import com.google.common.base.Preconditions; - import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.RawComparator; @@ -38,7 +37,9 @@ @SuppressWarnings({"unchecked", "rawtypes"}) @InterfaceAudience.Private -public class ConfigUtils { +public final class ConfigUtils { + + private ConfigUtils() {} public static Class getIntermediateOutputCompressorClass( Configuration conf, Class defaultValue) { @@ -56,24 +57,6 @@ public static Class getIntermediateOutputCompressorC } return codecClass; } - - public static Class getIntermediateInputCompressorClass( - Configuration conf, Class defaultValue) { - Class codecClass = defaultValue; - String name = conf - .get(TezRuntimeConfiguration.TEZ_RUNTIME_COMPRESS_CODEC); - if (name != null) { - try { - codecClass = conf.getClassByName(name).asSubclass( - CompressionCodec.class); - } catch (ClassNotFoundException e) { - throw new IllegalArgumentException("Compression codec " + name - + " was not found.", e); - } - } - return codecClass; - } - // TODO Move defaults over to a constants file. @@ -82,37 +65,28 @@ public static boolean shouldCompressIntermediateOutput(Configuration conf) { TezRuntimeConfiguration.TEZ_RUNTIME_COMPRESS, false); } - public static boolean isIntermediateInputCompressed(Configuration conf) { - return conf.getBoolean( - TezRuntimeConfiguration.TEZ_RUNTIME_COMPRESS, false); - } - public static Class getIntermediateOutputValueClass(Configuration conf) { - Class retv = (Class) conf.getClass( + return (Class) conf.getClass( TezRuntimeConfiguration.TEZ_RUNTIME_VALUE_CLASS, null, Object.class); - return retv; } public static Class getIntermediateInputValueClass(Configuration conf) { - Class retv = (Class) conf.getClass( + return (Class) conf.getClass( TezRuntimeConfiguration.TEZ_RUNTIME_VALUE_CLASS, null, Object.class); - return retv; } public static Class getIntermediateOutputKeyClass(Configuration conf) { - Class retv = (Class) conf.getClass( + return (Class) conf.getClass( TezRuntimeConfiguration.TEZ_RUNTIME_KEY_CLASS, null, Object.class); - return retv; } public static Class getIntermediateInputKeyClass(Configuration conf) { - Class retv = (Class) conf.getClass( + return (Class) conf.getClass( TezRuntimeConfiguration.TEZ_RUNTIME_KEY_CLASS, null, Object.class); - return retv; } public static RawComparator getIntermediateOutputKeyComparator(Configuration conf) { @@ -122,7 +96,7 @@ public static RawComparator getIntermediateOutputKeyComparator(Configurat if (theClass != null) return ReflectionUtils.newInstance(theClass, conf); return WritableComparator.get(getIntermediateOutputKeyClass(conf).asSubclass( - WritableComparable.class)); + WritableComparable.class), conf); } public static RawComparator getIntermediateInputKeyComparator(Configuration conf) { @@ -132,7 +106,7 @@ public static RawComparator getIntermediateInputKeyComparator(Configurati if (theClass != null) return ReflectionUtils.newInstance(theClass, conf); return WritableComparator.get(getIntermediateInputKeyClass(conf).asSubclass( - WritableComparable.class)); + WritableComparable.class), conf); } @@ -157,9 +131,9 @@ public static boolean useNewApi(Configuration conf) { @InterfaceAudience.Private public static Map extractConfigurationMap(Map confMap, Set allowedKeys) { - Preconditions.checkArgument(confMap != null, "ConfMap cannot be null"); - Preconditions.checkArgument(allowedKeys != null, "Valid key set cannot be empty"); - Map map = new HashMap(); + Objects.requireNonNull(confMap, "ConfMap cannot be null"); + Objects.requireNonNull(allowedKeys, "Valid key set cannot be empty"); + Map map = new HashMap<>(); for (Map.Entry entry : confMap.entrySet()) { if (allowedKeys.contains(entry.getKey())) { map.put(entry.getKey(), entry.getValue()); @@ -170,8 +144,8 @@ public static Map extractConfigurationMap(Map co @InterfaceAudience.Private public static void addConfigMapToConfiguration(Configuration conf, Map confMap) { - Preconditions.checkArgument(conf != null, "Configuration cannot be null"); - Preconditions.checkArgument(confMap != null, "Configuration map cannot be null"); + Objects.requireNonNull(conf, "Configuration cannot be null"); + Objects.requireNonNull(confMap, "Configuration map cannot be null"); for (Map.Entry entry : confMap.entrySet()) { conf.set(entry.getKey(), entry.getValue()); } @@ -181,9 +155,9 @@ public static void addConfigMapToConfiguration(Configuration conf, Map extractConfigurationMap(Map confMap, List> validKeySets, List allowedPrefixes) { - Preconditions.checkArgument(confMap != null, "ConfMap cannot be null"); - Preconditions.checkArgument(validKeySets != null, "Valid key set cannot be empty"); - Preconditions.checkArgument(allowedPrefixes != null, "Allowed prefixes cannot be null"); + Objects.requireNonNull(confMap, "ConfMap cannot be null"); + Objects.requireNonNull(validKeySets, "Valid key set cannot be empty"); + Objects.requireNonNull(allowedPrefixes, "Allowed prefixes cannot be null"); return extractConfigurationMapInternal(confMap.entrySet(), validKeySets, allowedPrefixes); } @@ -192,17 +166,17 @@ public static Map extractConfigurationMap(Map co public static Map extractConfigurationMap(Configuration conf, List> validKeySets, List allowedPrefixes) { - Preconditions.checkArgument(conf != null, "conf cannot be null"); - Preconditions.checkArgument(validKeySets != null, "Valid key set cannot be empty"); - Preconditions.checkArgument(allowedPrefixes != null, "Allowed prefixes cannot be null"); + Objects.requireNonNull(conf, "conf cannot be null"); + Objects.requireNonNull(validKeySets, "Valid key set cannot be empty"); + Objects.requireNonNull(allowedPrefixes, "Allowed prefixes cannot be null"); return extractConfigurationMapInternal(conf, validKeySets, allowedPrefixes); } @InterfaceAudience.Private public static boolean doesKeyQualify(String key, List> validKeySets, List allowedPrefixes) { - Preconditions.checkArgument(key != null, "key cannot be null"); - Preconditions.checkArgument(validKeySets != null, "Valid key set cannot be empty"); - Preconditions.checkArgument(allowedPrefixes != null, "Allowed prefixes cannot be null"); + Objects.requireNonNull(key, "key cannot be null"); + Objects.requireNonNull(validKeySets, "Valid key set cannot be empty"); + Objects.requireNonNull(allowedPrefixes, "Allowed prefixes cannot be null"); for (Set set : validKeySets) { if (set.contains(key)) { return true; @@ -218,8 +192,8 @@ public static boolean doesKeyQualify(String key, List> validKeySets, @InterfaceAudience.Private public static void mergeConfsWithExclusions(Configuration destConf, Map srcConf, Set excludedKeySet) { - Preconditions.checkState(destConf != null, "Destination conf cannot be null"); - Preconditions.checkState(srcConf != null, "Source conf cannot be null"); + Objects.requireNonNull(destConf, "Destination conf cannot be null"); + Objects.requireNonNull(srcConf, "Source conf cannot be null"); for (Map.Entry entry : srcConf.entrySet()) { if (!excludedKeySet.contains(entry.getKey())) { destConf.set(entry.getKey(), entry.getValue()); @@ -228,8 +202,8 @@ public static void mergeConfsWithExclusions(Configuration destConf, Map entry : srcConf) { // Explicit get to have parameter replacement work. String val = srcConf.get(entry.getKey()); @@ -239,11 +213,11 @@ public static void mergeConfs(Configuration destConf, Configuration srcConf) { private static Map extractConfigurationMapInternal( Iterable> iterable, List> validKeySets, List allowedPrefixes) { - Set validKeys = new HashSet(); + Set validKeys = new HashSet<>(); for (Set set : validKeySets) { validKeys.addAll(set); } - Map localConfMap = new HashMap(); + Map localConfMap = new HashMap<>(); for (Map.Entry entry : iterable) { if (validKeys.contains(entry.getKey())) { localConfMap.put(entry.getKey(), entry.getValue()); diff --git a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/Constants.java b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/Constants.java index 81921b2e85..e6cf73944f 100644 --- a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/Constants.java +++ b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/Constants.java @@ -20,7 +20,7 @@ import org.apache.hadoop.classification.InterfaceAudience.Private; @Private -public class Constants { +public final class Constants { // TODO NEWTEZ Check which of these constants are expecting specific pieces of information which are being removed - like taskAttemptId @@ -64,4 +64,5 @@ public class Constants { public static final String TEZ_RUNTIME_TASK_OUTPUT_MANAGER = "tez.runtime.task.local.output.manager"; + private Constants() {} } diff --git a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/InputAttemptIdentifier.java b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/InputAttemptIdentifier.java index 16172e1daf..d1d5aeda1a 100644 --- a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/InputAttemptIdentifier.java +++ b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/InputAttemptIdentifier.java @@ -108,6 +108,18 @@ public boolean canRetrieveInputInChunks() { (fetchTypeInfo == SPILL_INFO.FINAL_UPDATE.ordinal()); } + /** + * Checks whether this InputAttemptIdentifier includes the given InputAttemptIdentifier. + * It is used when we obsolete InputAttemptIdentifiers that include a FetchFailure reported one. + * + * @param thatInputAttemptIdentifier The InputAttemptIdentifier to check for inclusion. + * @return True if the current identifier includes the given one, false otherwise. + */ + public boolean includes(InputAttemptIdentifier thatInputAttemptIdentifier) { + return this.inputIdentifier == thatInputAttemptIdentifier.getInputIdentifier() && + this.attemptNumber == thatInputAttemptIdentifier.getAttemptNumber(); + } + // PathComponent & shared does not need to be part of the hashCode and equals computation. @Override public int hashCode() { @@ -139,6 +151,6 @@ public boolean equals(Object obj) { public String toString() { return "InputAttemptIdentifier [inputIdentifier=" + inputIdentifier + ", attemptNumber=" + attemptNumber + ", pathComponent=" - + pathComponent + ", spillType=" + fetchTypeInfo + ", spillId=" + spillEventId +"]"; + + pathComponent + ", spillType=" + fetchTypeInfo + ", spillId=" + spillEventId + "]"; } } diff --git a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/MemoryUpdateCallbackHandler.java b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/MemoryUpdateCallbackHandler.java index 68f754d1e7..ff31b49984 100644 --- a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/MemoryUpdateCallbackHandler.java +++ b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/MemoryUpdateCallbackHandler.java @@ -22,7 +22,7 @@ import org.apache.hadoop.classification.InterfaceStability.Evolving; import org.apache.tez.runtime.api.MemoryUpdateCallback; -import com.google.common.base.Preconditions; +import org.apache.tez.common.Preconditions; @Public @Evolving diff --git a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/TezRuntimeUtils.java b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/TezRuntimeUtils.java index 8e13c130f7..45eea0110b 100644 --- a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/TezRuntimeUtils.java +++ b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/TezRuntimeUtils.java @@ -46,7 +46,7 @@ import org.apache.tez.runtime.library.common.task.local.output.TezTaskOutputFiles; @Private -public class TezRuntimeUtils { +public final class TezRuntimeUtils { private static final Logger LOG = LoggerFactory .getLogger(TezRuntimeUtils.class); @@ -55,6 +55,8 @@ public class TezRuntimeUtils { //ShufflePort by default for ContainerLaunchers public static final int INVALID_PORT = -1; + private TezRuntimeUtils() {} + public static String getTaskIdentifier(String vertexName, int taskIndex) { return String.format("%s_%06d", vertexName, taskIndex); } @@ -78,37 +80,26 @@ public static Combiner instantiateCombiner(Configuration conf, TaskContext taskC if (className == null) { return null; } - if (LOG.isDebugEnabled()) { - LOG.debug("Using Combiner class: " + className); - } + LOG.debug("Using Combiner class: {}", className); try { clazz = (Class) conf.getClassByName(className); } catch (ClassNotFoundException e) { throw new IOException("Unable to load combiner class: " + className); } - - Combiner combiner = null; - - Constructor ctor; - try { - ctor = clazz.getConstructor(TaskContext.class); - combiner = ctor.newInstance(taskContext); - } catch (SecurityException e) { - throw new IOException(e); - } catch (NoSuchMethodException e) { - throw new IOException(e); - } catch (IllegalArgumentException e) { - throw new IOException(e); - } catch (InstantiationException e) { - throw new IOException(e); - } catch (IllegalAccessException e) { - throw new IOException(e); - } catch (InvocationTargetException e) { - throw new IOException(e); - } - return combiner; + + Combiner combiner; + + Constructor ctor; + try { + ctor = clazz.getConstructor(TaskContext.class); + combiner = ctor.newInstance(taskContext); + } catch (SecurityException | NoSuchMethodException | IllegalArgumentException | InstantiationException + | IllegalAccessException | InvocationTargetException e) { + throw new IOException(e); + } + return combiner; } - + @SuppressWarnings("unchecked") public static Partitioner instantiatePartitioner(Configuration conf) throws IOException { @@ -125,31 +116,22 @@ public static Partitioner instantiatePartitioner(Configuration conf) LOG.debug("Using partitioner class: " + clazz.getName()); } - Partitioner partitioner = null; + Partitioner partitioner; try { Constructor ctorWithConf = clazz .getConstructor(Configuration.class); partitioner = ctorWithConf.newInstance(conf); - } catch (SecurityException e) { + } catch (SecurityException | IllegalArgumentException | InstantiationException | IllegalAccessException + | InvocationTargetException e) { throw new IOException(e); } catch (NoSuchMethodException e) { try { // Try a 0 argument constructor. partitioner = clazz.newInstance(); - } catch (InstantiationException e1) { - throw new IOException(e1); - } catch (IllegalAccessException e1) { + } catch (InstantiationException | IllegalAccessException e1) { throw new IOException(e1); } - } catch (IllegalArgumentException e) { - throw new IOException(e); - } catch (InstantiationException e) { - throw new IOException(e); - } catch (IllegalAccessException e) { - throw new IOException(e); - } catch (InvocationTargetException e) { - throw new IOException(e); } return partitioner; } @@ -160,10 +142,9 @@ public static TezTaskOutput instantiateTaskOutputManager(Configuration conf, Out try { Constructor ctor = clazz.getConstructor(Configuration.class, String.class, int.class); ctor.setAccessible(true); - TezTaskOutput instance = (TezTaskOutput) ctor.newInstance(conf, + return (TezTaskOutput) ctor.newInstance(conf, outputContext.getUniqueIdentifier(), outputContext.getDagIdentifier()); - return instance; } catch (Exception e) { throw new TezUncheckedException( "Unable to instantiate configured TezOutputFileManager: " @@ -185,7 +166,45 @@ public static URL constructBaseURIForShuffleHandlerDagComplete( sb.append("&job="); sb.append(appId.replace("application", "job")); sb.append("&dag="); - sb.append(String.valueOf(dagIdentifier)); + sb.append(dagIdentifier); + return new URL(sb.toString()); + } + + public static URL constructBaseURIForShuffleHandlerVertexComplete( + String host, int port, String appId, int dagIdentifier, String vertexIdentifier, boolean sslShuffle) + throws MalformedURLException { + String httpProtocol = (sslShuffle) ? "https://" : "http://"; + StringBuilder sb = new StringBuilder(httpProtocol); + sb.append(host); + sb.append(":"); + sb.append(port); + sb.append("/"); + sb.append("mapOutput?vertexAction=delete"); + sb.append("&job="); + sb.append(appId.replace("application", "job")); + sb.append("&dag="); + sb.append(dagIdentifier); + sb.append("&vertex="); + sb.append(vertexIdentifier); + return new URL(sb.toString()); + } + + public static URL constructBaseURIForShuffleHandlerTaskAttemptFailed( + String host, int port, String appId, int dagIdentifier, String taskAttemptIdentifier, boolean sslShuffle) + throws MalformedURLException { + String httpProtocol = (sslShuffle) ? "https://" : "http://"; + StringBuilder sb = new StringBuilder(httpProtocol); + sb.append(host); + sb.append(":"); + sb.append(port); + sb.append("/"); + sb.append("mapOutput?taskAttemptAction=delete"); + sb.append("&job="); + sb.append(appId.replace("application", "job")); + sb.append("&dag="); + sb.append(dagIdentifier); + sb.append("&map="); + sb.append(taskAttemptIdentifier); return new URL(sb.toString()); } @@ -233,10 +252,9 @@ public static HttpConnectionParams getHttpConnectionParams(Configuration conf) { } } - HttpConnectionParams httpConnParams = new HttpConnectionParams(keepAlive, + return new HttpConnectionParams(keepAlive, keepAliveMaxConnections, connectionTimeout, readTimeout, bufferSize, sslShuffle, sslFactory); - return httpConnParams; } public static BaseHttpConnection getHttpConnection(boolean asyncHttp, URL url, @@ -252,13 +270,9 @@ public static BaseHttpConnection getHttpConnection(boolean asyncHttp, URL url, public static int deserializeShuffleProviderMetaData(ByteBuffer meta) throws IOException { - DataInputByteBuffer in = new DataInputByteBuffer(); - try { + try (DataInputByteBuffer in = new DataInputByteBuffer()) { in.reset(meta); - int port = in.readInt(); - return port; - } finally { - in.close(); + return in.readInt(); } } } diff --git a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/ValuesIterator.java b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/ValuesIterator.java index 7add8c5ec0..4a75cbdd04 100644 --- a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/ValuesIterator.java +++ b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/ValuesIterator.java @@ -33,7 +33,7 @@ import org.apache.tez.common.counters.TezCounter; import org.apache.tez.runtime.library.common.sort.impl.TezRawKeyValueIterator; -import com.google.common.base.Preconditions; +import org.apache.tez.common.Preconditions; /** * Iterates values while keys match in sorted input. diff --git a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/readers/UnorderedKVReader.java b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/readers/UnorderedKVReader.java index f4400dbcc7..60f70ac271 100644 --- a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/readers/UnorderedKVReader.java +++ b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/readers/UnorderedKVReader.java @@ -221,10 +221,10 @@ public IFile.Reader openIFileReader(FetchedInput fetchedInput) MemoryFetchedInput mfi = (MemoryFetchedInput) fetchedInput; return new InMemoryReader(null, mfi.getInputAttemptIdentifier(), - mfi.getBytes(), 0, (int) mfi.getActualSize()); + mfi.getBytes(), 0, (int) mfi.getSize()); } else { return new IFile.Reader(fetchedInput.getInputStream(), - fetchedInput.getCompressedSize(), codec, null, null, ifileReadAhead, + fetchedInput.getSize(), codec, null, null, ifileReadAhead, ifileReadAheadLength, ifileBufferSize); } } diff --git a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/security/SecureShuffleUtils.java b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/security/SecureShuffleUtils.java index 67b8de27c5..7e5f3af60f 100644 --- a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/security/SecureShuffleUtils.java +++ b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/security/SecureShuffleUtils.java @@ -31,19 +31,20 @@ import org.apache.tez.common.security.JobTokenSecretManager; /** - * - * utilities for generating kyes, hashes and verifying them for shuffle + * + * utilities for generating keys, hashes and verifying them for shuffle * */ @InterfaceAudience.Private @InterfaceStability.Unstable -public class SecureShuffleUtils { +public final class SecureShuffleUtils { public static final String HTTP_HEADER_URL_HASH = "UrlHash"; public static final String HTTP_HEADER_REPLY_URL_HASH = "ReplyHash"; - + + private SecureShuffleUtils() {} + /** * Base64 encoded hash of msg - * @param msg */ public static String generateHash(byte[] msg, SecretKey key) { return new String(Base64.encodeBase64(generateByteHash(msg, key)), Charsets.UTF_8); @@ -51,7 +52,7 @@ public static String generateHash(byte[] msg, SecretKey key) { /** * calculate hash of msg - * @param msg + * * @return byte array containing computed hash of message */ private static byte[] generateByteHash(byte[] msg, SecretKey key) { @@ -63,9 +64,6 @@ private static byte[] generateByteHash(byte[] msg, SecretKey key) { * This is only meant to be used when a process needs to verify against multiple different keys * (ShuffleHandler for instance) * - * @param hash - * @param msg - * @param key * @return true when hashes match; false otherwise */ private static boolean verifyHash(byte[] hash, byte[] msg, SecretKey key) { @@ -75,9 +73,7 @@ private static boolean verifyHash(byte[] hash, byte[] msg, SecretKey key) { /** * verify that hash equals to HMacHash(msg) - * @param hash - * @param msg - * @param mgr JobTokenSecretManager + * * @return true when hashes match; false otherwise */ private static boolean verifyHash(byte[] hash, byte[] msg, JobTokenSecretManager mgr) { @@ -87,14 +83,10 @@ private static boolean verifyHash(byte[] hash, byte[] msg, JobTokenSecretManager /** * Aux util to calculate hash of a String - * @param enc_str - * @param mgr JobTokenSecretManager - * @return Base64 encodedHash - * @throws IOException + * */ - public static String hashFromString(String enc_str, JobTokenSecretManager mgr) - throws IOException { - return new String(Base64.encodeBase64(mgr.computeHash(enc_str.getBytes(Charsets.UTF_8))), Charsets.UTF_8); + public static String hashFromString(String encStr, JobTokenSecretManager mgr) { + return new String(Base64.encodeBase64(mgr.computeHash(encStr.getBytes(Charsets.UTF_8))), Charsets.UTF_8); } /** @@ -106,13 +98,12 @@ public static String hashFromString(String enc_str, JobTokenSecretManager mgr) * @param base64Hash base64 encoded hash * @param msg the message * @param key the key to use to generate the hash from the message - * @throws IOException */ public static void verifyReply(String base64Hash, String msg, SecretKey key) throws IOException { byte[] hash = Base64.decodeBase64(base64Hash.getBytes(Charsets.UTF_8)); boolean res = verifyHash(hash, msg.getBytes(Charsets.UTF_8), key); - if(res != true) { + if(!res) { throw new IOException("Verification of the hashReply failed"); } } @@ -120,7 +111,7 @@ public static void verifyReply(String base64Hash, String msg, SecretKey key) thr /** * verify that base64Hash is same as HMacHash(msg) * @param base64Hash (Base64 encoded hash) - * @param msg + * @param msg the message * @throws IOException if not the same */ public static void verifyReply(String base64Hash, String msg, JobTokenSecretManager mgr) @@ -129,14 +120,14 @@ public static void verifyReply(String base64Hash, String msg, JobTokenSecretMana boolean res = verifyHash(hash, msg.getBytes(Charsets.UTF_8), mgr); - if(res != true) { + if(!res) { throw new IOException("Verification of the hashReply failed"); } } - + /** * Shuffle specific utils - build string for encoding from URL - * @param url + * * @return string for encoding */ public static String buildMsgFrom(URL url) { @@ -145,11 +136,10 @@ public static String buildMsgFrom(URL url) { /** * Shuffle specific utils - build string for encoding from URL - * @param uri_path - * @param uri_query + * * @return string for encoding */ private static String buildMsgFrom(String uri_path, String uri_query, int port) { - return String.valueOf(port) + uri_path + "?" + uri_query; + return port + uri_path + "?" + uri_query; } } diff --git a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/serializer/SerializationContext.java b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/serializer/SerializationContext.java new file mode 100644 index 0000000000..2398b8f930 --- /dev/null +++ b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/serializer/SerializationContext.java @@ -0,0 +1,87 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tez.runtime.library.common.serializer; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.io.serializer.Serialization; +import org.apache.hadoop.io.serializer.SerializationFactory; +import org.apache.hadoop.io.serializer.Serializer; +import org.apache.tez.runtime.library.api.TezRuntimeConfiguration; +import org.apache.tez.runtime.library.common.ConfigUtils; + +/** + * SerializationContext is a wrapper class for serialization related fields. + */ +public class SerializationContext { + + private Class keyClass; + private Class valueClass; + private Serialization keySerialization; + private Serialization valSerialization; + + public SerializationContext(Configuration conf) { + this.keyClass = ConfigUtils.getIntermediateInputKeyClass(conf); + this.valueClass = ConfigUtils.getIntermediateInputValueClass(conf); + SerializationFactory serializationFactory = new SerializationFactory(conf); + if (keyClass != null) { + this.keySerialization = serializationFactory.getSerialization(keyClass); + } + if (valueClass != null) { + this.valSerialization = serializationFactory.getSerialization(valueClass); + } + } + + public SerializationContext(Class keyClass, Class valueClass, + Serialization keySerialization, Serialization valSerialization) { + this.keyClass = keyClass; + this.valueClass = valueClass; + this.keySerialization = keySerialization; + this.valSerialization = valSerialization; + } + + public Class getKeyClass() { + return keyClass; + } + + public Class getValueClass() { + return valueClass; + } + + public Serialization getKeySerialization() { + return keySerialization; + } + + public Serialization getValSerialization() { + return valSerialization; + } + + @SuppressWarnings({ "rawtypes", "unchecked" }) + public Serializer getKeySerializer() { + return keySerialization.getSerializer((Class) keyClass); + } + + @SuppressWarnings({ "rawtypes", "unchecked" }) + public Serializer getValueSerializer() { + return valSerialization.getSerializer((Class) valueClass); + } + + public void applyToConf(Configuration conf) { + conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_KEY_CLASS, keyClass.getName()); + conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_VALUE_CLASS, valueClass.getName()); + } +} diff --git a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/DiskFetchedInput.java b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/DiskFetchedInput.java index c873af7140..22b2899e1e 100644 --- a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/DiskFetchedInput.java +++ b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/DiskFetchedInput.java @@ -31,7 +31,7 @@ import org.apache.tez.runtime.library.common.InputAttemptIdentifier; import org.apache.tez.runtime.library.common.task.local.output.TezTaskOutputFiles; -import com.google.common.base.Preconditions; +import org.apache.tez.common.Preconditions; public class DiskFetchedInput extends FetchedInput { @@ -40,21 +40,33 @@ public class DiskFetchedInput extends FetchedInput { private final FileSystem localFS; private final Path tmpOutputPath; private final Path outputPath; + private final long size; - public DiskFetchedInput(long actualSize, long compressedSize, + public DiskFetchedInput(long compressedSize, InputAttemptIdentifier inputAttemptIdentifier, FetchedInputCallback callbackHandler, Configuration conf, LocalDirAllocator localDirAllocator, TezTaskOutputFiles filenameAllocator) throws IOException { - super(Type.DISK, actualSize, compressedSize, inputAttemptIdentifier, callbackHandler); + super(inputAttemptIdentifier, callbackHandler); + this.size = compressedSize; this.localFS = FileSystem.getLocal(conf).getRaw(); this.outputPath = filenameAllocator.getInputFileForWrite( - this.inputAttemptIdentifier.getInputIdentifier(), this - .inputAttemptIdentifier.getSpillEventId(), actualSize); + this.getInputAttemptIdentifier().getInputIdentifier(), this + .getInputAttemptIdentifier().getSpillEventId(), this.size); // Files are not clobbered due to the id being appended to the outputPath in the tmpPath, // otherwise fetches for the same task but from different attempts would clobber each other. - this.tmpOutputPath = outputPath.suffix(String.valueOf(id)); + this.tmpOutputPath = outputPath.suffix(String.valueOf(getId())); + } + + @Override + public Type getType() { + return Type.DISK; + } + + @Override + public long getSize() { + return size; } @Override @@ -68,7 +80,7 @@ public InputStream getInputStream() throws IOException { } public final Path getInputPath() { - if (state == State.COMMITTED) { + if (isState(State.COMMITTED)) { return this.outputPath; } return this.tmpOutputPath; @@ -76,8 +88,8 @@ public final Path getInputPath() { @Override public void commit() throws IOException { - if (state == State.PENDING) { - state = State.COMMITTED; + if (isState(State.PENDING)) { + setState(State.COMMITTED); localFS.rename(tmpOutputPath, outputPath); notifyFetchComplete(); } @@ -85,8 +97,8 @@ public void commit() throws IOException { @Override public void abort() throws IOException { - if (state == State.PENDING) { - state = State.ABORTED; + if (isState(State.PENDING)) { + setState(State.ABORTED); // TODO NEWTEZ Maybe defer this to container cleanup localFS.delete(tmpOutputPath, false); notifyFetchFailure(); @@ -96,10 +108,10 @@ public void abort() throws IOException { @Override public void free() { Preconditions.checkState( - state == State.COMMITTED || state == State.ABORTED, + isState(State.COMMITTED) || isState(State.ABORTED), "FetchedInput can only be freed after it is committed or aborted"); - if (state == State.COMMITTED) { - state = State.FREED; + if (isState(State.COMMITTED)) { + setState(State.FREED); try { // TODO NEWTEZ Maybe defer this to container cleanup localFS.delete(outputPath, false); @@ -115,8 +127,8 @@ public void free() { @Override public String toString() { return "DiskFetchedInput [outputPath=" + outputPath - + ", inputAttemptIdentifier=" + inputAttemptIdentifier - + ", actualSize=" + actualSize + ",compressedSize=" + compressedSize - + ", type=" + type + ", id=" + id + ", state=" + state + "]"; + + ", inputAttemptIdentifier=" + getInputAttemptIdentifier() + + ", actualSize=" + getSize() + + ", type=" + getType() + ", id=" + getId() + ", state=" + getState() + "]"; } } diff --git a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/FetchedInput.java b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/FetchedInput.java index 3e740a020e..8982c27cad 100644 --- a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/FetchedInput.java +++ b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/FetchedInput.java @@ -42,38 +42,52 @@ protected static enum State { private static AtomicInteger ID_GEN = new AtomicInteger(0); - protected InputAttemptIdentifier inputAttemptIdentifier; - protected final long actualSize; - protected final long compressedSize; - protected final Type type; - protected final FetchedInputCallback callback; - protected final int id; - protected State state; - - public FetchedInput(Type type, long actualSize, long compressedSize, - InputAttemptIdentifier inputAttemptIdentifier, + private InputAttemptIdentifier inputAttemptIdentifier; + private final FetchedInputCallback callback; + private final int id; + private byte state; + + protected FetchedInput(InputAttemptIdentifier inputAttemptIdentifier, FetchedInputCallback callbackHandler) { - this.type = type; - this.actualSize = actualSize; - this.compressedSize = compressedSize; this.inputAttemptIdentifier = inputAttemptIdentifier; this.callback = callbackHandler; this.id = ID_GEN.getAndIncrement(); - this.state = State.PENDING; + this.state = (byte) State.PENDING.ordinal(); } - public Type getType() { - return this.type; + public abstract Type getType(); + + protected boolean isState(State state) { + return this.state == (byte) state.ordinal(); } - public long getActualSize() { - return this.actualSize; + protected void setState(State state) { + this.state = (byte) state.ordinal(); } - - public long getCompressedSize() { - return this.compressedSize; + + protected State getState() { + if (isState(State.PENDING)) { + return State.PENDING; + } + if (isState(State.COMMITTED)) { + return State.COMMITTED; + } + if (isState(State.ABORTED)) { + return State.ABORTED; + } + if (isState(State.FREED)) { + return State.FREED; + } + // Should not get here + return null; } + protected int getId() { + return this.id; + } + + public abstract long getSize(); + public InputAttemptIdentifier getInputAttemptIdentifier() { return this.inputAttemptIdentifier; } diff --git a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/Fetcher.java b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/Fetcher.java index bf8c83b98e..6094e6bdb9 100644 --- a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/Fetcher.java +++ b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/Fetcher.java @@ -58,6 +58,7 @@ import org.apache.tez.common.CallableWithNdc; import org.apache.tez.common.security.JobTokenSecretManager; import org.apache.tez.dag.api.TezUncheckedException; +import org.apache.tez.runtime.api.InputContext; import org.apache.tez.runtime.library.api.TezRuntimeConfiguration; import org.apache.tez.runtime.library.common.Constants; import org.apache.tez.runtime.library.common.InputAttemptIdentifier; @@ -66,8 +67,9 @@ import org.apache.tez.runtime.library.common.sort.impl.TezSpillRecord; import org.apache.tez.runtime.library.exceptions.FetcherReadTimeoutException; import org.apache.tez.runtime.library.common.shuffle.FetchedInput.Type; - -import com.google.common.base.Preconditions; +import org.apache.tez.runtime.library.common.shuffle.api.ShuffleHandlerError; +import org.apache.tez.common.Preconditions; +import org.apache.tez.common.TezUtilsInternal; /** * Responsible for fetching inputs served by the ShuffleHandler for a single @@ -130,21 +132,21 @@ public String toString() { private boolean ifileReadAhead = TezRuntimeConfiguration.TEZ_RUNTIME_IFILE_READAHEAD_DEFAULT; private int ifileReadAheadLength = TezRuntimeConfiguration.TEZ_RUNTIME_IFILE_READAHEAD_BYTES_DEFAULT; - + private final JobTokenSecretManager jobTokenSecretMgr; private final FetcherCallback fetcherCallback; private final FetchedInputAllocator inputManager; private final ApplicationId appId; private final int dagIdentifier; - + private final String logIdentifier; private final String localHostname; - + private final AtomicBoolean isShutDown = new AtomicBoolean(false); - private final int fetcherIdentifier; + protected final int fetcherIdentifier; // Parameters to track work. private List srcAttempts; @@ -171,7 +173,7 @@ public String getHost() { private URL url; private volatile DataInputStream input; - + BaseHttpConnection httpConnection; private HttpConnectionParams httpConnectionParams; @@ -192,9 +194,9 @@ public String getHost() { private final boolean isDebugEnabled = LOG.isDebugEnabled(); - private Fetcher(FetcherCallback fetcherCallback, HttpConnectionParams params, - FetchedInputAllocator inputManager, ApplicationId appId, int dagIdentifier, - JobTokenSecretManager jobTokenSecretManager, String srcNameTrimmed, Configuration conf, + protected Fetcher(FetcherCallback fetcherCallback, HttpConnectionParams params, + FetchedInputAllocator inputManager, InputContext inputContext, + JobTokenSecretManager jobTokenSecretManager, Configuration conf, RawLocalFileSystem localFs, LocalDirAllocator localDirAllocator, Path lockPath, @@ -207,8 +209,8 @@ private Fetcher(FetcherCallback fetcherCallback, HttpConnectionParams params, this.fetcherCallback = fetcherCallback; this.inputManager = inputManager; this.jobTokenSecretMgr = jobTokenSecretManager; - this.appId = appId; - this.dagIdentifier = dagIdentifier; + this.appId = inputContext.getApplicationId(); + this.dagIdentifier = inputContext.getDagIdentifier(); this.pathToAttemptMap = new HashMap(); this.httpConnectionParams = params; this.conf = conf; @@ -217,7 +219,10 @@ private Fetcher(FetcherCallback fetcherCallback, HttpConnectionParams params, this.sharedFetchEnabled = sharedFetchEnabled; this.fetcherIdentifier = fetcherIdGen.getAndIncrement(); - this.logIdentifier = " fetcher [" + srcNameTrimmed +"] " + fetcherIdentifier; + + String sourceDestNameTrimmed = TezUtilsInternal.cleanVertexName(inputContext.getSourceVertexName()) + " -> " + + TezUtilsInternal.cleanVertexName(inputContext.getTaskVertexName()); + this.logIdentifier = " fetcher [" + sourceDestNameTrimmed +"] " + fetcherIdentifier; this.localFs = localFs; this.localDirAllocator = localDirAllocator; @@ -235,6 +240,7 @@ private Fetcher(FetcherCallback fetcherCallback, HttpConnectionParams params, } } + // helper method to populate the remaining map void populateRemainingMap(List origlist) { if (srcAttemptsRemaining == null) { @@ -276,7 +282,8 @@ public FetchResult callInternal() throws Exception { HostFetchResult hostFetchResult; - if (localDiskFetchEnabled && host.equals(localHostname) && port == shufflePort) { + boolean isLocalFetch = localDiskFetchEnabled && host.equals(localHostname) && port == shufflePort; + if (isLocalFetch) { hostFetchResult = setupLocalDiskFetch(); } else if (multiplex) { hostFetchResult = doSharedFetch(); @@ -287,7 +294,7 @@ public FetchResult callInternal() throws Exception { if (hostFetchResult.failedInputs != null && hostFetchResult.failedInputs.length > 0) { if (!isShutDown.get()) { LOG.warn("copyInputs failed for tasks " + Arrays.toString(hostFetchResult.failedInputs)); - for (InputAttemptIdentifier left : hostFetchResult.failedInputs) { + for (InputAttemptFetchFailure left : hostFetchResult.failedInputs) { fetcherCallback.fetchFailed(host, left, hostFetchResult.connectFailed); } } else { @@ -338,7 +345,7 @@ public void cache(String host, DiskFetchedInput input = (DiskFetchedInput) fetchedInput; indexRec = new TezIndexRecord(0, decompressedLength, compressedLength); localFs.mkdirs(outputPath.getParent()); - // avoid pit-falls of speculation + // avoid pitfalls of speculation tmpPath = outputPath.suffix(tmpSuffix); // JDK7 - TODO: use Files implementation to speed up this process localFs.copyFromLocalFile(input.getInputPath(), tmpPath); @@ -357,7 +364,7 @@ public void cache(String host, } spillRec.putIndex(indexRec, 0); - spillRec.writeToFile(tmpIndex, conf); + spillRec.writeToFile(tmpIndex, conf, localFs); // everything went well so far - rename it boolean renamed = localFs.rename(tmpIndex, outputPath .suffix(Constants.TEZ_RUNTIME_TASK_OUTPUT_INDEX_SUFFIX_STRING)); @@ -452,7 +459,7 @@ protected HostFetchResult doSharedFetch() throws IOException { srcAttemptsRemaining.values(), "Requeuing as we didn't get a lock"), null, false); } else { if (findInputs() == srcAttemptsRemaining.size()) { - // double checked after lock + // double-checked after lock releaseLock(lock); lock = null; return doLocalDiskFetch(true); @@ -487,8 +494,9 @@ protected HostFetchResult doHttpFetch() { } private HostFetchResult setupConnection(Collection attempts) { + StringBuilder baseURI = null; try { - StringBuilder baseURI = ShuffleUtils.constructBaseURIForShuffleHandler(host, + baseURI = ShuffleUtils.constructBaseURIForShuffleHandler(host, port, partition, partitionCount, appId.toString(), dagIdentifier, httpConnectionParams.isSslShuffle()); this.url = ShuffleUtils.constructInputURL(baseURI.toString(), attempts, httpConnectionParams.isKeepAlive()); @@ -503,7 +511,7 @@ private HostFetchResult setupConnection(Collection attem // ioErrs.increment(1); // If connect did not succeed, just mark all the maps as failed, // indirectly penalizing the host - InputAttemptIdentifier[] failedFetches = null; + InputAttemptFetchFailure[] failedFetches = null; if (isShutDown.get()) { if (isDebugEnabled) { LOG.debug( @@ -511,8 +519,7 @@ private HostFetchResult setupConnection(Collection attem e.getClass().getName() + ", Message: " + e.getMessage()); } } else { - failedFetches = srcAttemptsRemaining.values(). - toArray(new InputAttemptIdentifier[srcAttemptsRemaining.values().size()]); + failedFetches = InputAttemptFetchFailure.fromAttempts(srcAttemptsRemaining.values()); } return new HostFetchResult(new FetchResult(host, port, partition, partitionCount, srcAttemptsRemaining.values()), failedFetches, true); } @@ -526,8 +533,7 @@ private HostFetchResult setupConnection(Collection attem } try { - input = httpConnection.getInputStream(); - httpConnection.validate(); + setupConnectionInternal(host, attempts); //validateConnectionResponse(msgToEncode, encHash); } catch (IOException e) { // ioErrs.increment(1); @@ -542,10 +548,10 @@ private HostFetchResult setupConnection(Collection attem } } else { InputAttemptIdentifier firstAttempt = attempts.iterator().next(); - LOG.warn("Fetch Failure from host while connecting: " + host + ", attempt: " + firstAttempt - + " Informing ShuffleManager: ", e); + LOG.warn("FETCH_FAILURE: Fetch Failure while connecting from {} to: {}:{}, attempt: {}, url: {}" + + " Informing ShuffleManager", localHostname, host, port, firstAttempt, baseURI, e); return new HostFetchResult(new FetchResult(host, port, partition, partitionCount, srcAttemptsRemaining.values()), - new InputAttemptIdentifier[] { firstAttempt }, false); + new InputAttemptFetchFailure[] { new InputAttemptFetchFailure(firstAttempt) }, true); } } catch (InterruptedException e) { Thread.currentThread().interrupt(); //reset status @@ -554,6 +560,13 @@ private HostFetchResult setupConnection(Collection attem return null; } + + protected void setupConnectionInternal(String host, Collection attempts) + throws IOException, InterruptedException { + input = httpConnection.getInputStream(); + httpConnection.validate(); + } + @VisibleForTesting protected HostFetchResult doHttpFetch(CachingCallBack callback) { @@ -582,7 +595,7 @@ protected HostFetchResult doHttpFetch(CachingCallBack callback) { // On any error, faildTasks is not null and we exit // after putting back the remaining maps to the // yet_to_be_fetched list and marking the failed tasks. - InputAttemptIdentifier[] failedInputs = null; + InputAttemptFetchFailure[] failedInputs = null; while (!srcAttemptsRemaining.isEmpty() && failedInputs == null) { InputAttemptIdentifier inputAttemptIdentifier = srcAttemptsRemaining.entrySet().iterator().next().getValue(); @@ -658,7 +671,7 @@ private HostFetchResult doLocalDiskFetch(boolean failMissing) { idxRecord = getTezIndexRecord(srcAttemptId, reduceId); fetchedInput = new LocalDiskFetchedInput(idxRecord.getStartOffset(), - idxRecord.getRawLength(), idxRecord.getPartLength(), srcAttemptId, + idxRecord.getPartLength(), srcAttemptId, getShuffleInputFileName(srcAttemptId.getPathComponent(), null), conf, new FetchedInputCallback() { @@ -683,7 +696,7 @@ public void freeResources(FetchedInput fetchedInput) { long endTime = System.currentTimeMillis(); fetcherCallback.fetchSucceeded(host, srcAttemptId, fetchedInput, idxRecord.getPartLength(), idxRecord.getRawLength(), (endTime - startTime)); - } catch (IOException e) { + } catch (IOException | InternalError e) { hasFailures = true; cleanupFetchedInput(fetchedInput); if (isShutDown.get()) { @@ -709,7 +722,7 @@ public void freeResources(FetchedInput fetchedInput) { } } - InputAttemptIdentifier[] failedFetches = null; + InputAttemptFetchFailure[] failedFetches = null; if (failMissing && srcAttemptsRemaining.size() > 0) { if (isShutDown.get()) { if (isDebugEnabled) { @@ -718,8 +731,8 @@ public void freeResources(FetchedInput fetchedInput) { " remaining inputs"); } } else { - failedFetches = srcAttemptsRemaining.values(). - toArray(new InputAttemptIdentifier[srcAttemptsRemaining.values().size()]); + failedFetches = + InputAttemptFetchFailure.fromAttemptsLocalFetchFailure(srcAttemptsRemaining.values()); } } else { // nothing needs to be done to requeue remaining entries @@ -735,7 +748,7 @@ protected TezIndexRecord getTezIndexRecord(InputAttemptIdentifier srcAttemptId, Path indexFile = getShuffleInputFileName(srcAttemptId.getPathComponent(), Constants.TEZ_RUNTIME_TASK_OUTPUT_INDEX_SUFFIX_STRING); - TezSpillRecord spillRecord = new TezSpillRecord(indexFile, conf); + TezSpillRecord spillRecord = new TezSpillRecord(indexFile, localFs); idxRecord = spillRecord.getIndex(partition); return idxRecord; } @@ -768,10 +781,10 @@ public Map getPathToAttemptMap() { static class HostFetchResult { private final FetchResult fetchResult; - private final InputAttemptIdentifier[] failedInputs; + private final InputAttemptFetchFailure[] failedInputs; private final boolean connectFailed; - public HostFetchResult(FetchResult fetchResult, InputAttemptIdentifier[] failedInputs, + public HostFetchResult(FetchResult fetchResult, InputAttemptFetchFailure[] failedInputs, boolean connectFailed) { this.fetchResult = fetchResult; this.failedInputs = failedInputs; @@ -829,8 +842,11 @@ public String toString() { return "id: " + srcAttemptId + ", decompressed length: " + decompressedLength + ", compressed length: " + compressedLength + ", reduce: " + forReduce; } } - private InputAttemptIdentifier[] fetchInputs(DataInputStream input, - CachingCallBack callback, InputAttemptIdentifier inputAttemptIdentifier) throws FetcherReadTimeoutException { + + @VisibleForTesting + InputAttemptFetchFailure[] fetchInputs(DataInputStream input, CachingCallBack callback, + InputAttemptIdentifier inputAttemptIdentifier) + throws FetcherReadTimeoutException { FetchedInput fetchedInput = null; InputAttemptIdentifier srcAttemptId = null; long decompressedLength = 0; @@ -854,9 +870,19 @@ private InputAttemptIdentifier[] fetchInputs(DataInputStream input, header.readFields(input); pathComponent = header.getMapId(); if (!pathComponent.startsWith(InputAttemptIdentifier.PATH_PREFIX)) { - throw new IllegalArgumentException("Invalid map id: " + header.getMapId() + ", expected to start with " + - InputAttemptIdentifier.PATH_PREFIX + ", partition: " + header.getPartition() - + " while fetching " + inputAttemptIdentifier); + if (pathComponent.startsWith(ShuffleHandlerError.DISK_ERROR_EXCEPTION.toString())) { + LOG.warn("Invalid map id: " + header.getMapId() + ", expected to start with " + + InputAttemptIdentifier.PATH_PREFIX + ", partition: " + header.getPartition() + + " while fetching " + inputAttemptIdentifier); + // this should be treated as local fetch failure while reporting later + return new InputAttemptFetchFailure[] { + InputAttemptFetchFailure.fromDiskErrorAtSource(inputAttemptIdentifier) }; + } else { + throw new IllegalArgumentException( + "Invalid map id: " + header.getMapId() + ", expected to start with " + + InputAttemptIdentifier.PATH_PREFIX + ", partition: " + header.getPartition() + + " while fetching " + inputAttemptIdentifier); + } } srcAttemptId = pathToAttemptMap.get(new PathPartition(pathComponent, header.getPartition())); @@ -881,7 +907,7 @@ private InputAttemptIdentifier[] fetchInputs(DataInputStream input, if (!isShutDown.get()) { LOG.warn("Invalid src id ", e); // Don't know which one was bad, so consider all of them as bad - return srcAttemptsRemaining.values().toArray(new InputAttemptIdentifier[srcAttemptsRemaining.size()]); + return InputAttemptFetchFailure.fromAttempts(srcAttemptsRemaining.values()); } else { if (isDebugEnabled) { LOG.debug("Already shutdown. Ignoring badId error with message: " + e.getMessage()); @@ -900,7 +926,8 @@ private InputAttemptIdentifier[] fetchInputs(DataInputStream input, srcAttemptId = getNextRemainingAttempt(); } assert (srcAttemptId != null); - return new InputAttemptIdentifier[]{srcAttemptId}; + return new InputAttemptFetchFailure[] { + InputAttemptFetchFailure.fromAttempt(srcAttemptId) }; } else { if (isDebugEnabled) { LOG.debug("Already shutdown. Ignoring verification failure."); @@ -980,7 +1007,7 @@ private InputAttemptIdentifier[] fetchInputs(DataInputStream input, // metrics.successFetch(); } srcAttemptsRemaining.remove(inputAttemptIdentifier.toString()); - } catch (IOException ioe) { + } catch (IOException | InternalError ioe) { if (isShutDown.get()) { cleanupFetchedInput(fetchedInput); if (isDebugEnabled) { @@ -1002,19 +1029,20 @@ private InputAttemptIdentifier[] fetchInputs(DataInputStream input, // Cleanup the fetchedInput before returning. cleanupFetchedInput(fetchedInput); if (srcAttemptId == null) { - return srcAttemptsRemaining.values() - .toArray(new InputAttemptIdentifier[srcAttemptsRemaining.size()]); + return InputAttemptFetchFailure.fromAttempts(srcAttemptsRemaining.values()); } else { - return new InputAttemptIdentifier[] { srcAttemptId }; + return new InputAttemptFetchFailure[] { + new InputAttemptFetchFailure(srcAttemptId) }; } } - LOG.warn("Failed to shuffle output of " + srcAttemptId + " from " + host, - ioe); + LOG.warn("Failed to shuffle output of " + srcAttemptId + " from " + host + " (to " + + localHostname + ")", ioe); // Cleanup the fetchedInput cleanupFetchedInput(fetchedInput); // metrics.failedFetch(); - return new InputAttemptIdentifier[] { srcAttemptId }; + return new InputAttemptFetchFailure[] { + new InputAttemptFetchFailure(srcAttemptId) }; } return null; } @@ -1037,7 +1065,7 @@ private void cleanupFetchedInput(FetchedInput fetchedInput) { * @return true to indicate connection retry. false otherwise. * @throws IOException */ - private boolean shouldRetry(InputAttemptIdentifier srcAttemptId, IOException ioe) { + private boolean shouldRetry(InputAttemptIdentifier srcAttemptId, Throwable ioe) { if (!(ioe instanceof SocketTimeoutException)) { return false; } @@ -1047,9 +1075,9 @@ private boolean shouldRetry(InputAttemptIdentifier srcAttemptId, IOException ioe retryStartTime = currentTime; } - if (currentTime - retryStartTime < httpConnectionParams.getReadTimeout()) { + if ((currentTime - retryStartTime) - httpConnectionParams.getReadTimeout() < 0) { LOG.warn("Shuffle output from " + srcAttemptId + - " failed, retry it."); + " failed (to "+ localHostname +"), retry it."); //retry connecting to the host return true; } else { @@ -1062,7 +1090,7 @@ private boolean shouldRetry(InputAttemptIdentifier srcAttemptId, IOException ioe /** * Do some basic verification on the input received -- Being defensive - * + * * @param compressedLength * @param decompressedLength * @param fetchPartition @@ -1092,7 +1120,7 @@ private boolean verifySanity(long compressedLength, long decompressedLength, } return true; } - + private InputAttemptIdentifier getNextRemainingAttempt() { if (srcAttemptsRemaining.size() > 0) { return srcAttemptsRemaining.values().iterator().next(); @@ -1109,26 +1137,32 @@ public static class FetcherBuilder { private boolean workAssigned = false; public FetcherBuilder(FetcherCallback fetcherCallback, - HttpConnectionParams params, FetchedInputAllocator inputManager, - ApplicationId appId, int dagIdentifier, JobTokenSecretManager jobTokenSecretMgr, String srcNameTrimmed, - Configuration conf, boolean localDiskFetchEnabled, String localHostname, int shufflePort, - boolean asyncHttp, boolean verifyDiskChecksum, boolean compositeFetch) { - this.fetcher = new Fetcher(fetcherCallback, params, inputManager, appId, dagIdentifier, - jobTokenSecretMgr, srcNameTrimmed, conf, null, null, null, localDiskFetchEnabled, + HttpConnectionParams params, FetchedInputAllocator inputManager, InputContext inputContext, + JobTokenSecretManager jobTokenSecretMgr, Configuration conf, boolean localDiskFetchEnabled, + String localHostname, int shufflePort, boolean asyncHttp, boolean verifyDiskChecksum, boolean compositeFetch) { + this.fetcher = new Fetcher(fetcherCallback, params, inputManager, inputContext, + jobTokenSecretMgr, conf, null, null, null, localDiskFetchEnabled, false, localHostname, shufflePort, asyncHttp, verifyDiskChecksum, compositeFetch); } public FetcherBuilder(FetcherCallback fetcherCallback, - HttpConnectionParams params, FetchedInputAllocator inputManager, - ApplicationId appId, int dagIdentifier, JobTokenSecretManager jobTokenSecretMgr, String srcNameTrimmed, - Configuration conf, RawLocalFileSystem localFs, + HttpConnectionParams params, FetchedInputAllocator inputManager, InputContext inputContext, + JobTokenSecretManager jobTokenSecretMgr, Configuration conf, RawLocalFileSystem localFs, LocalDirAllocator localDirAllocator, Path lockPath, boolean localDiskFetchEnabled, boolean sharedFetchEnabled, - String localHostname, int shufflePort, boolean asyncHttp, boolean verifyDiskChecksum, boolean compositeFetch) { - this.fetcher = new Fetcher(fetcherCallback, params, inputManager, appId, dagIdentifier, - jobTokenSecretMgr, srcNameTrimmed, conf, localFs, localDirAllocator, - lockPath, localDiskFetchEnabled, sharedFetchEnabled, localHostname, shufflePort, asyncHttp, - verifyDiskChecksum, compositeFetch); + String localHostname, int shufflePort, boolean asyncHttp, boolean verifyDiskChecksum, boolean compositeFetch, + boolean enableFetcherTestingErrors) { + if (enableFetcherTestingErrors) { + this.fetcher = new FetcherWithInjectableErrors(fetcherCallback, params, inputManager, inputContext, + jobTokenSecretMgr, conf, localFs, localDirAllocator, + lockPath, localDiskFetchEnabled, sharedFetchEnabled, localHostname, shufflePort, asyncHttp, + verifyDiskChecksum, compositeFetch); + } else { + this.fetcher = new Fetcher(fetcherCallback, params, inputManager, inputContext, + jobTokenSecretMgr, conf, localFs, localDirAllocator, + lockPath, localDiskFetchEnabled, sharedFetchEnabled, localHostname, shufflePort, asyncHttp, + verifyDiskChecksum, compositeFetch); + } } public FetcherBuilder setHttpConnectionParameters(HttpConnectionParams httpParams) { diff --git a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/FetcherCallback.java b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/FetcherCallback.java index 34bd272909..b751fb9ce0 100644 --- a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/FetcherCallback.java +++ b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/FetcherCallback.java @@ -28,6 +28,7 @@ public void fetchSucceeded(String host, InputAttemptIdentifier srcAttemptIdentif FetchedInput fetchedInput, long fetchedBytes, long decompressedLength, long copyDuration) throws IOException; - public void fetchFailed(String host, InputAttemptIdentifier srcAttemptIdentifier, boolean connectFailed); + public void fetchFailed(String host, InputAttemptFetchFailure srcAttemptFetchFailure, + boolean connectFailed); } diff --git a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/FetcherErrorTestingConfig.java b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/FetcherErrorTestingConfig.java new file mode 100644 index 0000000000..ce15a87fb9 --- /dev/null +++ b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/FetcherErrorTestingConfig.java @@ -0,0 +1,111 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tez.runtime.library.common.shuffle; + +import java.util.Arrays; +import java.util.List; +import java.util.Random; + +import org.apache.hadoop.conf.Configuration; +import org.apache.tez.common.TezUtilsInternal; +import org.apache.tez.runtime.api.ObjectRegistry; +import org.apache.tez.runtime.library.api.TezRuntimeConfiguration; +import org.apache.tez.runtime.library.common.InputAttemptIdentifier; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class FetcherErrorTestingConfig { + private static final Logger LOG = LoggerFactory.getLogger(FetcherErrorTestingConfig.class); + private static final String KEY_CACHED_HOSTNAME = "FetcherErrorTestingConfig.host"; + + private String hostToFail = "*"; + private String srcNameTrimmedToFail = "*"; + private int probabilityPercent = 50; + private Random random = new Random(); + /** + * Whether to fail only in case of input attempts with index 0, + * this prevents continuous failure, and helps simulating a real-life node failure. + */ + private boolean failForFirstAttemptOnly = false; + private ObjectRegistry objectRegistry; + + public FetcherErrorTestingConfig(Configuration conf, ObjectRegistry objectRegistry) { + String errorConfig = conf.get(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_TESTING_ERRORS_CONFIG, + TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_TESTING_ERRORS_CONFIG_DEFAULT); + String[] configParts = errorConfig.split("#"); + + // e.g. host_1 + if (configParts.length > 0) { + hostToFail = configParts[0]; + } + + // e.g. Map 1 or Map_1, both will work + if (configParts.length > 1) { + srcNameTrimmedToFail = TezUtilsInternal.cleanVertexName(configParts[1]); + } + + // e.g. 50 + if (configParts.length > 2) { + probabilityPercent = Integer.parseInt(configParts[2]); + } + + // e.g. fail_only_first + if (configParts.length > 3) { + List features = Arrays.asList(configParts[3].split(",")); + if (features.contains("fail_only_first")) { + failForFirstAttemptOnly = true; + } + } + + this.objectRegistry = objectRegistry; + if (hostToFail.equals("_first_")) { + String host = (String) objectRegistry.get(KEY_CACHED_HOSTNAME); + if (host != null) { + LOG.info("Get already stored hostname for fetcher test failures: " + host); + hostToFail = host; + } + } + } + + public boolean shouldFail(String host, String srcNameTrimmed, InputAttemptIdentifier inputAttemptIdentifier) { + if (matchHost(host) && matchSourceVertex(srcNameTrimmed)) { + return (!failForFirstAttemptOnly || failForFirstAttemptOnly && inputAttemptIdentifier.getAttemptNumber() == 0) + && random.nextInt(100) < probabilityPercent; + } + return false; + } + + private boolean matchHost(String host) { + if (hostToFail.equals("_first_")) { + objectRegistry.cacheForVertex(KEY_CACHED_HOSTNAME, host); + hostToFail = host; + } + return "*".equals(hostToFail) || host.equalsIgnoreCase(hostToFail); + } + + private boolean matchSourceVertex(String srcNameTrimmed) { + return "*".equals(srcNameTrimmedToFail) || srcNameTrimmed.equalsIgnoreCase(srcNameTrimmedToFail); + } + + @Override + public String toString() { + return String.format( + "[FetcherErrorTestingConfig: host: %s, source vertex: %s, probability: %d%%, failForFirstAttemptOnly: %s]", + hostToFail, srcNameTrimmedToFail, probabilityPercent, failForFirstAttemptOnly); + } +} diff --git a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/FetcherWithInjectableErrors.java b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/FetcherWithInjectableErrors.java new file mode 100644 index 0000000000..cf53a57a9e --- /dev/null +++ b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/FetcherWithInjectableErrors.java @@ -0,0 +1,87 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tez.runtime.library.common.shuffle; + +import java.io.IOException; +import java.util.Collection; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.LocalDirAllocator; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.RawLocalFileSystem; +import org.apache.tez.common.TezUtilsInternal; +import org.apache.tez.common.security.JobTokenSecretManager; +import org.apache.tez.http.HttpConnectionParams; +import org.apache.tez.runtime.api.InputContext; +import org.apache.tez.runtime.library.common.InputAttemptIdentifier; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class FetcherWithInjectableErrors extends Fetcher { + private static final Logger LOG = LoggerFactory.getLogger(FetcherWithInjectableErrors.class); + + private FetcherErrorTestingConfig fetcherErrorTestingConfig; + private String srcNameTrimmed; + + protected FetcherWithInjectableErrors(FetcherCallback fetcherCallback, HttpConnectionParams params, + FetchedInputAllocator inputManager, InputContext inputContext, + JobTokenSecretManager jobTokenSecretManager, Configuration conf, + RawLocalFileSystem localFs, LocalDirAllocator localDirAllocator, Path lockPath, boolean localDiskFetchEnabled, + boolean sharedFetchEnabled, String localHostname, int shufflePort, boolean asyncHttp, boolean verifyDiskChecksum, + boolean compositeFetch) { + super(fetcherCallback, params, inputManager, inputContext, jobTokenSecretManager, conf, + localFs, localDirAllocator, lockPath, localDiskFetchEnabled, sharedFetchEnabled, localHostname, shufflePort, + asyncHttp, verifyDiskChecksum, compositeFetch); + this.fetcherErrorTestingConfig = new FetcherErrorTestingConfig(conf, inputContext.getObjectRegistry()); + this.srcNameTrimmed = TezUtilsInternal.cleanVertexName(inputContext.getSourceVertexName()); + LOG.info("Initialized FetcherWithInjectableErrors with config: {}", fetcherErrorTestingConfig); + } + + @Override + protected void setupConnectionInternal(String host, Collection attempts) + throws IOException, InterruptedException { + LOG.info("Checking if fetcher should fail for host: {} ...", host); + for (InputAttemptIdentifier inputAttemptIdentifier : attempts) { + if (fetcherErrorTestingConfig.shouldFail(host, srcNameTrimmed, inputAttemptIdentifier)) { + throw new IOException(String.format( + "FetcherWithInjectableErrors tester made failure for host: %s, input attempt: %s", host, + inputAttemptIdentifier.getAttemptNumber())); + } + } + super.setupConnectionInternal(host, attempts); + } + + @Override + public int hashCode() { + return fetcherIdentifier; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) { + return true; + } + if (obj == null) { + return false; + } + if (getClass() != obj.getClass()) { + return false; + } + FetcherWithInjectableErrors other = (FetcherWithInjectableErrors) obj; + if (fetcherIdentifier != other.fetcherIdentifier) { + return false; + } + return true; + } +} diff --git a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/InputAttemptFetchFailure.java b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/InputAttemptFetchFailure.java new file mode 100644 index 0000000000..4ce1699cf5 --- /dev/null +++ b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/InputAttemptFetchFailure.java @@ -0,0 +1,125 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tez.runtime.library.common.shuffle; + +import java.util.Arrays; +import java.util.Collection; + +import org.apache.tez.runtime.library.common.CompositeInputAttemptIdentifier; +import org.apache.tez.runtime.library.common.InputAttemptIdentifier; + +/** + * InputAttemptFetchFailure is supposed to wrap an InputAttemptIdentifier with any kind of failure + * information during fetch. It can be useful for propagating as a single object instead of multiple + * parameters (local fetch error, remote fetch error, connect failed, read failed, etc.). + */ +public class InputAttemptFetchFailure { + + private final InputAttemptIdentifier inputAttemptIdentifier; + private final boolean isLocalFetch; + private final boolean isDiskErrorAtSource; + private Throwable cause = null; + + public InputAttemptFetchFailure(InputAttemptIdentifier inputAttemptIdentifier) { + this(inputAttemptIdentifier, false, false); + } + + public InputAttemptFetchFailure(InputAttemptIdentifier inputAttemptIdentifier, + boolean isLocalFetch, boolean isDiskErrorAtSource) { + this.inputAttemptIdentifier = inputAttemptIdentifier; + this.isLocalFetch = isLocalFetch; + this.isDiskErrorAtSource = isDiskErrorAtSource; + } + + public InputAttemptIdentifier getInputAttemptIdentifier() { + return inputAttemptIdentifier; + } + + public boolean isLocalFetch() { + return isLocalFetch; + } + + public boolean isDiskErrorAtSource() { + return isDiskErrorAtSource; + } + + public static InputAttemptFetchFailure fromAttempt(InputAttemptIdentifier attempt) { + return new InputAttemptFetchFailure(attempt, false, false); + } + + public static InputAttemptFetchFailure fromLocalFetchFailure(InputAttemptIdentifier attempt) { + return new InputAttemptFetchFailure(attempt, true, false); + } + + public static InputAttemptFetchFailure fromDiskErrorAtSource(InputAttemptIdentifier attempt) { + return new InputAttemptFetchFailure(attempt, false, true); + } + + public static InputAttemptFetchFailure[] fromAttempts(Collection values) { + return values.stream().map(identifier -> new InputAttemptFetchFailure(identifier, false, false)) + .toArray(InputAttemptFetchFailure[]::new); + } + + public static InputAttemptFetchFailure[] fromAttempts(InputAttemptIdentifier[] values) { + return Arrays.asList(values).stream() + .map(identifier -> new InputAttemptFetchFailure(identifier, false, false)) + .toArray(InputAttemptFetchFailure[]::new); + } + + public static InputAttemptFetchFailure[] fromAttemptsLocalFetchFailure( + Collection values) { + return values.stream().map(identifier -> new InputAttemptFetchFailure(identifier, true, false)) + .toArray(InputAttemptFetchFailure[]::new); + } + + public static InputAttemptFetchFailure fromCompositeAttemptLocalFetchFailure( + CompositeInputAttemptIdentifier compositeInputAttemptIdentifier) { + return new InputAttemptFetchFailure(compositeInputAttemptIdentifier, true, false); + } + + @Override + public boolean equals(Object obj) { + if (obj == null || (obj.getClass() != this.getClass())) { + return false; + } + return inputAttemptIdentifier.equals(((InputAttemptFetchFailure) obj).inputAttemptIdentifier) + && isLocalFetch == ((InputAttemptFetchFailure) obj).isLocalFetch + && isDiskErrorAtSource == ((InputAttemptFetchFailure) obj).isDiskErrorAtSource; + } + + @Override + public int hashCode() { + return 31 * inputAttemptIdentifier.hashCode() + 31 * (isLocalFetch ? 0 : 1) + + 31 * (isDiskErrorAtSource ? 0 : 1); + } + + @Override + public String toString() { + return String.format("%s, isLocalFetch: %s, isDiskErrorAtSource: %s", + inputAttemptIdentifier.toString(), isLocalFetch, isDiskErrorAtSource); + } + + public InputAttemptFetchFailure withCause(Throwable throwable) { + this.cause = throwable; + return this; + } + + public Throwable getCause() { + return cause; + } +} diff --git a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/LocalDiskFetchedInput.java b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/LocalDiskFetchedInput.java index 5c6396156b..d7ec265048 100644 --- a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/LocalDiskFetchedInput.java +++ b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/LocalDiskFetchedInput.java @@ -23,7 +23,7 @@ import java.io.OutputStream; import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; +import org.apache.tez.common.Preconditions; import org.apache.commons.io.input.BoundedInputStream; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -39,15 +39,27 @@ public class LocalDiskFetchedInput extends FetchedInput { private final Path inputFile; private final FileSystem localFS; private final long startOffset; + private final long size; - public LocalDiskFetchedInput(long startOffset, long actualSize, long compressedSize, + public LocalDiskFetchedInput(long startOffset, long compressedSize, InputAttemptIdentifier inputAttemptIdentifier, Path inputFile, Configuration conf, FetchedInputCallback callbackHandler) throws IOException { - super(Type.DISK_DIRECT, actualSize, compressedSize, inputAttemptIdentifier, callbackHandler); + super(inputAttemptIdentifier, callbackHandler); + this.size = compressedSize; this.startOffset = startOffset; this.inputFile = inputFile; - localFS = FileSystem.getLocal(conf); + localFS = FileSystem.getLocal(conf).getRaw(); + } + + @Override + public Type getType() { + return Type.DISK_DIRECT; + } + + @Override + public long getSize() { + return size; } @Override @@ -59,21 +71,21 @@ public OutputStream getOutputStream() throws IOException { public InputStream getInputStream() throws IOException { FSDataInputStream inputStream = localFS.open(inputFile); inputStream.seek(startOffset); - return new BoundedInputStream(inputStream, compressedSize); + return new BoundedInputStream(inputStream, getSize()); } @Override public void commit() { - if (state == State.PENDING) { - state = State.COMMITTED; + if (isState(State.PENDING)) { + setState(State.COMMITTED); notifyFetchComplete(); } } @Override public void abort() { - if (state == State.PENDING) { - state = State.ABORTED; + if (isState(State.PENDING)) { + setState(State.ABORTED); notifyFetchFailure(); } } @@ -81,10 +93,10 @@ public void abort() { @Override public void free() { Preconditions.checkState( - state == State.COMMITTED || state == State.ABORTED, + isState(State.COMMITTED) || isState(State.ABORTED), "FetchedInput can only be freed after it is committed or aborted"); - if (state == State.COMMITTED) { // ABORTED would have already called cleanup - state = State.FREED; + if (isState(State.COMMITTED)) { // ABORTED would have already called cleanup + setState(State.FREED); notifyFreedResource(); } } @@ -93,12 +105,11 @@ public void free() { public String toString() { return "LocalDiskFetchedInput [inputFile path =" + inputFile + ", offset" + startOffset + - ", actualSize=" + actualSize + - ", compressedSize=" + compressedSize + - ", inputAttemptIdentifier=" + inputAttemptIdentifier + - ", type=" + type + - ", id=" + id + - ", state=" + state + "]"; + ", compressedSize=" + getSize() + + ", inputAttemptIdentifier=" + getInputAttemptIdentifier() + + ", type=" + getType() + + ", id=" + getId() + + ", state=" + getState() + "]"; } @VisibleForTesting diff --git a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/MemoryFetchedInput.java b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/MemoryFetchedInput.java index 78f1f3b596..63aefa8d13 100644 --- a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/MemoryFetchedInput.java +++ b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/MemoryFetchedInput.java @@ -25,45 +25,58 @@ import org.apache.tez.common.io.NonSyncByteArrayInputStream; import org.apache.tez.runtime.library.common.InputAttemptIdentifier; -import com.google.common.base.Preconditions; +import org.apache.tez.common.Preconditions; public class MemoryFetchedInput extends FetchedInput { - private BoundedByteArrayOutputStream byteStream; + private byte[] byteArray; - public MemoryFetchedInput(long actualSize, long compressedSize, + public MemoryFetchedInput(long actualSize, InputAttemptIdentifier inputAttemptIdentifier, FetchedInputCallback callbackHandler) { - super(Type.MEMORY, actualSize, compressedSize, inputAttemptIdentifier, callbackHandler); - this.byteStream = new BoundedByteArrayOutputStream((int) actualSize); + super(inputAttemptIdentifier, callbackHandler); + this.byteArray = new byte[(int) actualSize]; + } + + @Override + public Type getType() { + return Type.MEMORY; + } + + @Override + public long getSize() { + if (this.byteArray == null) { + return 0; + } + return this.byteArray.length; } @Override public OutputStream getOutputStream() { - return byteStream; + return new InMemoryBoundedByteArrayOutputStream(byteArray); } @Override public InputStream getInputStream() { - return new NonSyncByteArrayInputStream(byteStream.getBuffer()); + return new NonSyncByteArrayInputStream(byteArray); } public byte[] getBytes() { - return byteStream.getBuffer(); + return byteArray; } @Override public void commit() { - if (state == State.PENDING) { - state = State.COMMITTED; + if (isState(State.PENDING)) { + setState(State.COMMITTED); notifyFetchComplete(); } } @Override public void abort() { - if (state == State.PENDING) { - state = State.ABORTED; + if (isState(State.PENDING)) { + setState(State.ABORTED); notifyFetchFailure(); } } @@ -71,20 +84,28 @@ public void abort() { @Override public void free() { Preconditions.checkState( - state == State.COMMITTED || state == State.ABORTED, + isState(State.COMMITTED) || isState(State.ABORTED), "FetchedInput can only be freed after it is committed or aborted"); - if (state == State.COMMITTED) { // ABORTED would have already called cleanup - state = State.FREED; - this.byteStream = null; + if (isState(State.COMMITTED)) { // ABORTED would have already called cleanup + setState(State.FREED); notifyFreedResource(); + // Set this to null AFTER notifyFreedResource() so that getSize() + // returns the correct size + this.byteArray = null; } } @Override public String toString() { return "MemoryFetchedInput [inputAttemptIdentifier=" - + inputAttemptIdentifier + ", actualSize=" + actualSize - + ", compressedSize=" + compressedSize + ", type=" + type + ", id=" - + id + ", state=" + state + "]"; + + getInputAttemptIdentifier() + ", size=" + getSize() + + ", type=" + getType() + ", id=" + + getId() + ", state=" + getState() + "]"; + } + + private static class InMemoryBoundedByteArrayOutputStream extends BoundedByteArrayOutputStream { + InMemoryBoundedByteArrayOutputStream(byte[] array) { + super(array, 0, array.length); + } } } diff --git a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/ShuffleUtils.java b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/ShuffleUtils.java index bf58172ef1..c5fb651167 100644 --- a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/ShuffleUtils.java +++ b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/ShuffleUtils.java @@ -29,13 +29,14 @@ import java.util.BitSet; import java.util.Collection; import java.util.List; +import java.util.Objects; import java.util.concurrent.atomic.AtomicLong; import java.util.zip.Deflater; import javax.annotation.Nullable; import javax.crypto.SecretKey; -import com.google.common.base.Preconditions; +import org.apache.tez.common.Preconditions; import com.google.common.primitives.Ints; import com.google.protobuf.ByteString; @@ -71,7 +72,7 @@ import org.apache.tez.runtime.library.shuffle.impl.ShuffleUserPayloads.DataMovementEventPayloadProto; import org.apache.tez.runtime.library.shuffle.impl.ShuffleUserPayloads.DetailedPartitionStatsProto; -public class ShuffleUtils { +public final class ShuffleUtils { private static final Logger LOG = LoggerFactory.getLogger(ShuffleUtils.class); private static final long MB = 1024l * 1024l; @@ -93,6 +94,8 @@ protected FastNumberFormat initialValue() { } }; + private ShuffleUtils() {} + public static SecretKey getJobTokenSecretFromTokenBytes(ByteBuffer meta) throws IOException { DataInputByteBuffer in = new DataInputByteBuffer(); @@ -121,11 +124,8 @@ public static void shuffleToMemory(byte[] shuffleData, IFile.Reader.readToMemory(shuffleData, input, compressedLength, codec, ifileReadAhead, ifileReadAheadLength); // metrics.inputBytes(shuffleData.length); - if (LOG.isDebugEnabled()) { - LOG.debug("Read " + shuffleData.length + " bytes from input for " - + identifier); - } - } catch (InternalError | IOException e) { + LOG.debug("Read {} bytes from input for {}", shuffleData.length, identifier); + } catch (InternalError | Exception e) { // Close the streams LOG.info("Failed to read data to memory for " + identifier + ". len=" + compressedLength + ", decomp=" + decompressedLength + ". ExceptionMessage=" + e.getMessage()); @@ -135,9 +135,12 @@ public static void shuffleToMemory(byte[] shuffleData, // on decompression failures. Catching and re-throwing as IOException // to allow fetch failure logic to be processed. throw new IOException(e); + } else if (e instanceof IOException) { + throw e; + } else { + // Re-throw as an IOException + throw new IOException(e); } - // Re-throw - throw e; } } @@ -197,8 +200,7 @@ public static void ioCleanup(Closeable... closeables) { try { c.close(); } catch (IOException e) { - if (LOG.isDebugEnabled()) - LOG.debug("Exception in closing " + c, e); + LOG.debug("Exception in closing {}", c, e); } } } @@ -260,7 +262,8 @@ public static String stringify(DataMovementEventPayloadProto dmProto) { sb.append("host: " + dmProto.getHost()).append(", "); sb.append("port: " + dmProto.getPort()).append(", "); sb.append("pathComponent: " + dmProto.getPathComponent()).append(", "); - sb.append("runDuration: " + dmProto.getRunDuration()); + sb.append("runDuration: " + dmProto.getRunDuration()).append(", "); + sb.append("hasDataInEvent: " + dmProto.hasData()); sb.append("]"); return sb.toString(); } @@ -407,7 +410,7 @@ public static void generateEventOnSpill(List eventList, boolean finalMerg int numPhysicalOutputs, boolean sendEmptyPartitionDetails, String pathComponent, @Nullable long[] partitionStats, boolean reportDetailedPartitionStats, String auxiliaryService, Deflater deflater) throws IOException { - Preconditions.checkArgument(eventList != null, "EventList can't be null"); + Objects.requireNonNull(eventList, "EventList can't be null"); context.notifyProgress(); if (finalMergeEnabled) { @@ -563,15 +566,14 @@ public void logIndividualFetchComplete(long millis, long bytesCompressed, if (activeLogger.isInfoEnabled()) { long wholeMBs = 0; long partialMBs = 0; - if (millis != 0) { - // fast math is done using integer math to avoid double to string conversion - // calculate B/s * 100 to preserve MBs precision to two decimal places - // multiply numerator by 100000 (2^5 * 5^5) and divide denominator by MB (2^20) - // simply fraction to protect ourselves from overflow by factoring out 2^5 - wholeMBs = (bytesCompressed * 3125) / (millis * 32768); - partialMBs = wholeMBs % 100; - wholeMBs /= 100; - } + millis = Math.max(1L, millis); + // fast math is done using integer math to avoid double to string conversion + // calculate B/s * 100 to preserve MBs precision to two decimal places + // multiply numerator by 100000 (2^5 * 5^5) and divide denominator by MB (2^20) + // simply fraction to protect ourselves from overflow by factoring out 2^5 + wholeMBs = (bytesCompressed * 3125) / (millis * 32768); + partialMBs = wholeMBs % 100; + wholeMBs /= 100; StringBuilder sb = new StringBuilder("Completed fetch for attempt: "); toShortString(srcAttemptIdentifier, sb); sb.append(" to "); diff --git a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/impl/ShuffleInputEventHandlerImpl.java b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/impl/ShuffleInputEventHandlerImpl.java index 542ec34338..56b8cd4a08 100644 --- a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/impl/ShuffleInputEventHandlerImpl.java +++ b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/impl/ShuffleInputEventHandlerImpl.java @@ -43,7 +43,11 @@ import org.apache.tez.runtime.library.common.shuffle.FetchedInputAllocator; import org.apache.tez.runtime.library.common.shuffle.ShuffleEventHandler; import org.apache.tez.runtime.library.common.shuffle.ShuffleUtils; +import org.apache.tez.runtime.library.common.shuffle.FetchedInput; +import org.apache.tez.runtime.library.shuffle.impl.ShuffleUserPayloads.DataProto; import org.apache.tez.runtime.library.shuffle.impl.ShuffleUserPayloads.DataMovementEventPayloadProto; +import org.apache.tez.runtime.library.common.shuffle.DiskFetchedInput; +import org.apache.tez.runtime.library.common.shuffle.MemoryFetchedInput; import com.google.protobuf.InvalidProtocolBufferException; @@ -157,7 +161,7 @@ private void handleEvent(Event event) throws IOException { @Override public void logProgress(boolean updateOnClose) { - LOG.info(inputContext.getSourceVertexName() + ": " + LOG.info(inputContext.getInputOutputVertexNames() + ": " + "numDmeEventsSeen=" + numDmeEvents.get() + ", numDmeEventsSeenWithNoData=" + numDmeEventsNoData.get() + ", numObsoletionEventsSeen=" + numObsoletionEvents.get() @@ -166,6 +170,7 @@ public void logProgress(boolean updateOnClose) { private void processDataMovementEvent(DataMovementEvent dme, DataMovementEventPayloadProto shufflePayload, BitSet emptyPartitionsBitSet) throws IOException { int srcIndex = dme.getSourceIndex(); + if (LOG.isDebugEnabled()) { LOG.debug("DME srcIdx: " + srcIndex + ", targetIndex: " + dme.getTargetIndex() + ", attemptNum: " + dme.getVersion() + ", payload: " + ShuffleUtils @@ -176,20 +181,49 @@ private void processDataMovementEvent(DataMovementEvent dme, DataMovementEventPa if (emptyPartitionsBitSet.get(srcIndex)) { CompositeInputAttemptIdentifier srcAttemptIdentifier = constructInputAttemptIdentifier(dme.getTargetIndex(), 1, dme.getVersion(), shufflePayload, false); - if (LOG.isDebugEnabled()) { - LOG.debug("Source partition: " + srcIndex + " did not generate any data. SrcAttempt: [" - + srcAttemptIdentifier + "]. Not fetching."); - } + LOG.debug("Source partition: {} did not generate any data. SrcAttempt: [{}]. Not fetching.", + srcIndex, srcAttemptIdentifier); numDmeEventsNoData.getAndIncrement(); shuffleManager.addCompletedInputWithNoData(srcAttemptIdentifier.expand(0)); return; + } else { + shuffleManager.updateApproximateInputRecords(shufflePayload.getNumRecord()); } + } else { + shuffleManager.updateApproximateInputRecords(shufflePayload.getNumRecord()); } CompositeInputAttemptIdentifier srcAttemptIdentifier = constructInputAttemptIdentifier(dme.getTargetIndex(), 1, dme.getVersion(), shufflePayload, (useSharedInputs && srcIndex == 0)); - shuffleManager.addKnownInput(shufflePayload.getHost(), shufflePayload.getPort(), srcAttemptIdentifier, srcIndex); + processShufflePayload(shufflePayload, srcAttemptIdentifier, srcIndex); + } + + private void moveDataToFetchedInput(DataProto dataProto, + FetchedInput fetchedInput, String hostIdentifier) throws IOException { + switch (fetchedInput.getType()) { + case DISK: + ShuffleUtils + .shuffleToDisk(((DiskFetchedInput) fetchedInput).getOutputStream(), + hostIdentifier, dataProto.getData().newInput(), + dataProto.getCompressedLength(), + dataProto.getUncompressedLength(), LOG, + fetchedInput.getInputAttemptIdentifier(), ifileReadAhead, + ifileReadAheadLength, true); + break; + case MEMORY: + ShuffleUtils + .shuffleToMemory(((MemoryFetchedInput) fetchedInput).getBytes(), + dataProto.getData().newInput(), dataProto.getRawLength(), + dataProto.getCompressedLength(), + codec, ifileReadAhead, ifileReadAheadLength, LOG, + fetchedInput.getInputAttemptIdentifier()); + break; + case WAIT: + default: + throw new TezUncheckedException("Unexpected type: " + + fetchedInput.getType()); + } } private void processCompositeRoutedDataMovementEvent(CompositeRoutedDataMovementEvent crdme, DataMovementEventPayloadProto shufflePayload, BitSet emptyPartitionsBitSet) throws IOException { @@ -210,10 +244,8 @@ private void processCompositeRoutedDataMovementEvent(CompositeRoutedDataMovement allPartitionsEmpty &= emptyPartitionsBitSet.get(srcPartitionId); if (emptyPartitionsBitSet.get(srcPartitionId)) { InputAttemptIdentifier srcAttemptIdentifier = compositeInputAttemptIdentifier.expand(i); - if (LOG.isDebugEnabled()) { - LOG.debug("Source partition: " + srcPartitionId + " did not generate any data. SrcAttempt: [" - + srcAttemptIdentifier + "]. Not fetching."); - } + LOG.debug("Source partition: {} did not generate any data. SrcAttempt: [{}]. Not fetching.", + srcPartitionId, srcAttemptIdentifier); numDmeEventsNoData.getAndIncrement(); shuffleManager.addCompletedInputWithNoData(srcAttemptIdentifier); } @@ -227,11 +259,30 @@ private void processCompositeRoutedDataMovementEvent(CompositeRoutedDataMovement CompositeInputAttemptIdentifier srcAttemptIdentifier = constructInputAttemptIdentifier(crdme.getTargetIndex(), crdme.getCount(), crdme.getVersion(), shufflePayload, (useSharedInputs && partitionId == 0)); - shuffleManager.addKnownInput(shufflePayload.getHost(), shufflePayload.getPort(), srcAttemptIdentifier, partitionId); + processShufflePayload(shufflePayload, srcAttemptIdentifier, partitionId); + } + + private void processShufflePayload(DataMovementEventPayloadProto shufflePayload, + CompositeInputAttemptIdentifier srcAttemptIdentifier, int srcIndex) throws IOException { + if (shufflePayload.hasData()) { + DataProto dataProto = shufflePayload.getData(); + String hostIdentifier = shufflePayload.getHost() + ":" + shufflePayload.getPort(); + FetchedInput fetchedInput = + inputAllocator.allocate(dataProto.getRawLength(), + dataProto.getCompressedLength(), srcAttemptIdentifier); + moveDataToFetchedInput(dataProto, fetchedInput, hostIdentifier); + shuffleManager.addCompletedInputWithData(srcAttemptIdentifier, fetchedInput); + + LOG.debug("Payload via DME : " + srcAttemptIdentifier); + } else { + shuffleManager.addKnownInput(shufflePayload.getHost(), shufflePayload.getPort(), + srcAttemptIdentifier, srcIndex); + } } private void processInputFailedEvent(InputFailedEvent ife) { InputAttemptIdentifier srcAttemptIdentifier = new InputAttemptIdentifier(ife.getTargetIndex(), ife.getVersion()); + LOG.info("Marking obsolete input: {} {}", inputContext.getSourceVertexName(), srcAttemptIdentifier); shuffleManager.obsoleteKnownInput(srcAttemptIdentifier); } diff --git a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/impl/ShuffleManager.java b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/impl/ShuffleManager.java index e1b7f99008..646194c6d7 100644 --- a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/impl/ShuffleManager.java +++ b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/impl/ShuffleManager.java @@ -26,6 +26,7 @@ import java.util.Arrays; import java.util.BitSet; import java.util.Collections; +import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; @@ -35,8 +36,10 @@ import java.util.concurrent.ConcurrentMap; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; +import java.util.concurrent.Future; import java.util.concurrent.LinkedBlockingDeque; import java.util.concurrent.LinkedBlockingQueue; +import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.locks.Condition; @@ -45,6 +48,8 @@ import javax.crypto.SecretKey; import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.yarn.util.Clock; +import org.apache.hadoop.yarn.util.MonotonicClock; import org.apache.tez.dag.api.TezConfiguration; import org.apache.tez.http.HttpConnectionParams; import org.apache.tez.runtime.api.TaskFailureType; @@ -58,6 +63,7 @@ import org.apache.hadoop.fs.RawLocalFileSystem; import org.apache.hadoop.io.compress.CompressionCodec; import org.apache.tez.common.CallableWithNdc; +import org.apache.tez.common.GuavaShim; import org.apache.tez.common.TezRuntimeFrameworkConfigs; import org.apache.tez.common.TezUtilsInternal; import org.apache.tez.common.counters.TaskCounter; @@ -78,13 +84,14 @@ import org.apache.tez.runtime.library.common.shuffle.Fetcher.FetcherBuilder; import org.apache.tez.runtime.library.common.shuffle.FetcherCallback; import org.apache.tez.runtime.library.common.shuffle.HostPort; +import org.apache.tez.runtime.library.common.shuffle.InputAttemptFetchFailure; import org.apache.tez.runtime.library.common.shuffle.InputHost; import org.apache.tez.runtime.library.common.shuffle.InputHost.PartitionToInputs; import org.apache.tez.runtime.library.common.shuffle.ShuffleUtils; import org.apache.tez.runtime.library.common.shuffle.ShuffleUtils.FetchStatsLogger; import com.google.common.base.Objects; -import com.google.common.base.Preconditions; +import org.apache.tez.common.Preconditions; import com.google.common.collect.Iterables; import com.google.common.collect.Lists; import com.google.common.util.concurrent.FutureCallback; @@ -113,9 +120,30 @@ public class ShuffleManager implements FetcherCallback { @VisibleForTesting final ListeningExecutorService fetcherExecutor; + /** + * Executor for ReportCallable. + */ + private ExecutorService reporterExecutor; + + /** + * Lock to sync failedEvents. + */ + private final ReentrantLock reportLock = new ReentrantLock(); + + /** + * Condition to wake up the thread notifying when events fail. + */ + private final Condition reportCondition = reportLock.newCondition(); + + /** + * Events reporting fetcher failed. + */ + private final HashMap failedEvents + = new HashMap<>(); + private final ListeningExecutorService schedulerExecutor; private final RunShuffleCallable schedulerCallable; - + private final BlockingQueue completedInputs; private final AtomicBoolean inputReadyNotificationSent = new AtomicBoolean(false); @VisibleForTesting @@ -146,17 +174,26 @@ public class ShuffleManager implements FetcherCallback { private final boolean sharedFetchEnabled; private final boolean verifyDiskChecksum; private final boolean compositeFetch; - + private final boolean enableFetcherTestingErrors; + private final int ifileBufferSize; private final boolean ifileReadAhead; private final int ifileReadAheadLength; + + /** + * Holds the time to wait for failures to batch them and send less events. + */ + private final int maxTimeToWaitForReportMillis; - private final String srcNameTrimmed; + private final String sourceDestNameTrimmed; private final int maxTaskOutputAtOnce; private final AtomicBoolean isShutdown = new AtomicBoolean(false); + private long inputRecordsFromEvents; + private long eventsReceived; + private final TezCounter approximateInputRecords; private final TezCounter shuffledInputsCounter; private final TezCounter failedShufflesCounter; private final TezCounter bytesShuffledCounter; @@ -190,7 +227,8 @@ public ShuffleManager(InputContext inputContext, Configuration conf, int numInpu CompressionCodec codec, FetchedInputAllocator inputAllocator) throws IOException { this.inputContext = inputContext; this.numInputs = numInputs; - + + this.approximateInputRecords = inputContext.getCounters().findCounter(TaskCounter.APPROXIMATE_INPUT_RECORDS); this.shuffledInputsCounter = inputContext.getCounters().findCounter(TaskCounter.NUM_SHUFFLED_INPUTS); this.failedShufflesCounter = inputContext.getCounters().findCounter(TaskCounter.NUM_FAILED_SHUFFLE_INPUTS); this.bytesShuffledCounter = inputContext.getCounters().findCounter(TaskCounter.SHUFFLE_BYTES); @@ -198,7 +236,8 @@ public ShuffleManager(InputContext inputContext, Configuration conf, int numInpu this.bytesShuffledToDiskCounter = inputContext.getCounters().findCounter(TaskCounter.SHUFFLE_BYTES_TO_DISK); this.bytesShuffledToMemCounter = inputContext.getCounters().findCounter(TaskCounter.SHUFFLE_BYTES_TO_MEM); this.bytesShuffledDirectDiskCounter = inputContext.getCounters().findCounter(TaskCounter.SHUFFLE_BYTES_DISK_DIRECT); - + + this.ifileBufferSize = bufferSize; this.ifileReadAhead = ifileReadAheadEnabled; this.ifileReadAheadLength = ifileReadAheadLength; @@ -211,14 +250,23 @@ public ShuffleManager(InputContext inputContext, Configuration conf, int numInpu this.verifyDiskChecksum = conf.getBoolean( TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_VERIFY_DISK_CHECKSUM, TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_VERIFY_DISK_CHECKSUM_DEFAULT); + this.maxTimeToWaitForReportMillis = conf.getInt( + TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_BATCH_WAIT, + TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_BATCH_WAIT_DEFAULT); + this.shufflePhaseTime = inputContext.getCounters().findCounter(TaskCounter.SHUFFLE_PHASE_TIME); this.firstEventReceived = inputContext.getCounters().findCounter(TaskCounter.FIRST_EVENT_RECEIVED); this.lastEventReceived = inputContext.getCounters().findCounter(TaskCounter.LAST_EVENT_RECEIVED); this.compositeFetch = ShuffleUtils.isTezShuffleHandler(conf); - this.srcNameTrimmed = TezUtilsInternal.cleanVertexName(inputContext.getSourceVertexName()); - + this.enableFetcherTestingErrors = + conf.getBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_ENABLE_TESTING_ERRORS, + TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_ENABLE_TESTING_ERRORS_DEFAULT); + + this.sourceDestNameTrimmed = TezUtilsInternal.cleanVertexName(inputContext.getSourceVertexName()) + " -> " + + TezUtilsInternal.cleanVertexName(inputContext.getTaskVertexName()); + completedInputSet = new BitSet(numInputs); /** * In case of pipelined shuffle, it is possible to get multiple FetchedInput per attempt. @@ -241,15 +289,15 @@ public ShuffleManager(InputContext inputContext, Configuration conf, int numInpu if (conf.getBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCHER_USE_SHARED_POOL, TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCHER_USE_SHARED_POOL_DEFAULT)) { fetcherRawExecutor = inputContext.createTezFrameworkExecutorService(numFetchers, - "Fetcher_B {" + srcNameTrimmed + "} #%d"); + "Fetcher_B {" + sourceDestNameTrimmed + "} #%d"); } else { fetcherRawExecutor = Executors.newFixedThreadPool(numFetchers, new ThreadFactoryBuilder() - .setDaemon(true).setNameFormat("Fetcher_B {" + srcNameTrimmed + "} #%d").build()); + .setDaemon(true).setNameFormat("Fetcher_B {" + sourceDestNameTrimmed + "} #%d").build()); } this.fetcherExecutor = MoreExecutors.listeningDecorator(fetcherRawExecutor); ExecutorService schedulerRawExecutor = Executors.newFixedThreadPool(1, new ThreadFactoryBuilder() - .setDaemon(true).setNameFormat("ShuffleRunner {" + srcNameTrimmed + "}").build()); + .setDaemon(true).setNameFormat("ShuffleRunner {" + sourceDestNameTrimmed + "}").build()); this.schedulerExecutor = MoreExecutors.listeningDecorator(schedulerRawExecutor); this.schedulerCallable = new RunShuffleCallable(conf); @@ -289,24 +337,85 @@ public ShuffleManager(InputContext inputContext, Configuration conf, int numInpu shuffleInfoEventsMap = new ConcurrentHashMap(); - LOG.info(srcNameTrimmed + ": numInputs=" + numInputs + ", compressionCodec=" + LOG.info(sourceDestNameTrimmed + ": numInputs=" + numInputs + ", compressionCodec=" + (codec == null ? "NoCompressionCodec" : codec.getClass().getName()) + ", numFetchers=" + numFetchers + ", ifileBufferSize=" + ifileBufferSize + ", ifileReadAheadEnabled=" + ifileReadAhead + ", ifileReadAheadLength=" + ifileReadAheadLength +", " + "localDiskFetchEnabled=" + localDiskFetchEnabled + ", " + "sharedFetchEnabled=" + sharedFetchEnabled + ", " - + httpConnectionParams.toString() + ", maxTaskOutputAtOnce=" + maxTaskOutputAtOnce); + + httpConnectionParams.toString() + ", maxTaskOutputAtOnce=" + maxTaskOutputAtOnce + + ", asyncHttp=" + asyncHttp); + } + + public void updateApproximateInputRecords(int delta) { + if (delta <= 0) { + return; + } + inputRecordsFromEvents += delta; + eventsReceived++; + approximateInputRecords.setValue((inputRecordsFromEvents / eventsReceived) * numInputs); } public void run() throws IOException { Preconditions.checkState(inputManager != null, "InputManager must be configured"); + if (maxTimeToWaitForReportMillis > 0) { + reporterExecutor = Executors.newSingleThreadExecutor( + new ThreadFactoryBuilder().setDaemon(true) + .setNameFormat("ShuffleRunner {" + sourceDestNameTrimmed + "}") + .build()); + Future reporterFuture = reporterExecutor.submit(new ReporterCallable()); + } + ListenableFuture runShuffleFuture = schedulerExecutor.submit(schedulerCallable); - Futures.addCallback(runShuffleFuture, new SchedulerFutureCallback()); + Futures.addCallback(runShuffleFuture, new SchedulerFutureCallback(), GuavaShim.directExecutor()); // Shutdown this executor once this task, and the callback complete. schedulerExecutor.shutdown(); } - + + private class ReporterCallable extends CallableWithNdc { + /** + * Measures if the batching interval has ended. + */ + private final Clock clock; + ReporterCallable() { + clock = new MonotonicClock(); + } + + @Override + protected Void callInternal() throws Exception { + long nextReport = 0; + while (!isShutdown.get()) { + try { + reportLock.lock(); + while (failedEvents.isEmpty()) { + boolean signaled = reportCondition.await(maxTimeToWaitForReportMillis, + TimeUnit.MILLISECONDS); + } + + long currentTime = clock.getTime(); + if (currentTime > nextReport) { + if (failedEvents.size() > 0) { + List failedEventsToSend = Lists.newArrayListWithCapacity( + failedEvents.size()); + for (InputReadErrorEvent key : failedEvents.keySet()) { + failedEventsToSend.add(InputReadErrorEvent.create(key.getDiagnostics(), + key.getIndex(), key.getVersion(), failedEvents.get(key), key.isLocalFetch(), + key.isDiskErrorAtSource(), localhostName)); + } + inputContext.sendEvents(failedEventsToSend); + failedEvents.clear(); + nextReport = currentTime + maxTimeToWaitForReportMillis; + } + } + } finally { + reportLock.unlock(); + } + } + return null; + } + } + private class RunShuffleCallable extends CallableWithNdc { private final Configuration conf; @@ -320,9 +429,12 @@ protected Void callInternal() throws Exception { while (!isShutdown.get() && numCompletedInputs.get() < numInputs) { lock.lock(); try { - if (runningFetchers.size() >= numFetchers || pendingHosts.isEmpty()) { - if (numCompletedInputs.get() < numInputs) { - wakeLoop.await(); + while ((runningFetchers.size() >= numFetchers || pendingHosts.isEmpty()) + && numCompletedInputs.get() < numInputs) { + inputContext.notifyProgress(); + boolean ret = wakeLoop.await(1000, TimeUnit.MILLISECONDS); + if (isShutdown.get()) { + break; } } } finally { @@ -335,9 +447,7 @@ protected Void callInternal() throws Exception { break; } - if (LOG.isDebugEnabled()) { - LOG.debug(srcNameTrimmed + ": " + "NumCompletedInputs: " + numCompletedInputs); - } + LOG.debug("{}: NumCompletedInputs: {}", sourceDestNameTrimmed, numCompletedInputs); if (numCompletedInputs.get() < numInputs && !isShutdown.get()) { lock.lock(); try { @@ -349,7 +459,8 @@ protected Void callInternal() throws Exception { inputHost = pendingHosts.take(); } catch (InterruptedException e) { if (isShutdown.get()) { - LOG.info(srcNameTrimmed + ": " + "Interrupted and hasBeenShutdown, Breaking out of ShuffleScheduler Loop"); + LOG.info(sourceDestNameTrimmed + ": " + + "Interrupted and hasBeenShutdown, Breaking out of ShuffleScheduler Loop"); Thread.currentThread().interrupt(); break; } else { @@ -357,26 +468,26 @@ protected Void callInternal() throws Exception { } } if (LOG.isDebugEnabled()) { - LOG.debug(srcNameTrimmed + ": " + "Processing pending host: " + + LOG.debug(sourceDestNameTrimmed + ": " + "Processing pending host: " + inputHost.toDetailedString()); } if (inputHost.getNumPendingPartitions() > 0 && !isShutdown.get()) { Fetcher fetcher = constructFetcherForHost(inputHost, conf); runningFetchers.add(fetcher); if (isShutdown.get()) { - LOG.info(srcNameTrimmed + ": " + "hasBeenShutdown," + + LOG.info(sourceDestNameTrimmed + ": " + "hasBeenShutdown," + "Breaking out of ShuffleScheduler Loop"); break; } ListenableFuture future = fetcherExecutor .submit(fetcher); - Futures.addCallback(future, new FetchFutureCallback(fetcher)); + Futures.addCallback(future, new FetchFutureCallback(fetcher), GuavaShim.directExecutor()); if (++count >= maxFetchersToRun) { break; } } else { if (LOG.isDebugEnabled()) { - LOG.debug(srcNameTrimmed + ": " + "Skipping host: " + + LOG.debug(sourceDestNameTrimmed + ": " + "Skipping host: " + inputHost.getIdentifier() + " since it has no inputs to process"); } @@ -388,7 +499,8 @@ protected Void callInternal() throws Exception { } } shufflePhaseTime.setValue(System.currentTimeMillis() - startTime); - LOG.info(srcNameTrimmed + ": " + "Shutting down FetchScheduler, Was Interrupted: " + Thread.currentThread().isInterrupted()); + LOG.info(sourceDestNameTrimmed + ": " + "Shutting down FetchScheduler, Was Interrupted: " + + Thread.currentThread().isInterrupted()); if (!fetcherExecutor.isShutdown()) { fetcherExecutor.shutdownNow(); } @@ -431,15 +543,15 @@ Fetcher constructFetcherForHost(InputHost inputHost, Configuration conf) { if (sharedFetchEnabled) { // pick a single lock disk from the edge name's hashcode + host hashcode - final int h = Math.abs(Objects.hashCode(this.srcNameTrimmed, inputHost.getHost())); + final int h = Math.abs(Objects.hashCode(this.sourceDestNameTrimmed, inputHost.getHost())); lockDisk = new Path(this.localDisks[h % this.localDisks.length], "locks"); } FetcherBuilder fetcherBuilder = new FetcherBuilder(ShuffleManager.this, - httpConnectionParams, inputManager, inputContext.getApplicationId(), inputContext.getDagIdentifier(), - jobTokenSecretMgr, srcNameTrimmed, conf, localFs, localDirAllocator, + httpConnectionParams, inputManager, inputContext, + jobTokenSecretMgr, conf, localFs, localDirAllocator, lockDisk, localDiskFetchEnabled, sharedFetchEnabled, - localhostName, shufflePort, asyncHttp, verifyDiskChecksum, compositeFetch); + localhostName, shufflePort, asyncHttp, verifyDiskChecksum, compositeFetch, enableFetcherTestingErrors); if (codec != null) { fetcherBuilder.setCompressionParameters(codec); @@ -471,8 +583,9 @@ Fetcher constructFetcherForHost(InputHost inputHost, Configuration conf) { } else { alreadyCompleted = completedInputSet.get(input.getInputIdentifier()); } + // Avoid adding attempts which have already completed or have been marked as OBSOLETE - if (alreadyCompleted || obsoletedInputs.contains(input)) { + if (alreadyCompleted || isObsoleteInputAttemptIdentifier(input)) { inputIter.remove(); continue; } @@ -522,7 +635,7 @@ public void addKnownInput(String hostName, int port, } } if (LOG.isDebugEnabled()) { - LOG.debug(srcNameTrimmed + ": " + "Adding input: " + + LOG.debug(sourceDestNameTrimmed + ": " + "Adding input: " + srcAttemptIdentifier + ", to host: " + host); } @@ -555,9 +668,7 @@ public void addKnownInput(String hostName, int port, public void addCompletedInputWithNoData( InputAttemptIdentifier srcAttemptIdentifier) { int inputIdentifier = srcAttemptIdentifier.getInputIdentifier(); - if (LOG.isDebugEnabled()) { - LOG.debug("No input data exists for SrcTask: " + inputIdentifier + ". Marking as complete."); - } + LOG.debug("No input data exists for SrcTask: {}. Marking as complete.", inputIdentifier); lock.lock(); try { if (!completedInputSet.get(inputIdentifier)) { @@ -575,6 +686,52 @@ public void addCompletedInputWithNoData( } } + public void addCompletedInputWithData( + InputAttemptIdentifier srcAttemptIdentifier, FetchedInput fetchedInput) + throws IOException { + //InputIdentifier inputIdentifier = srcAttemptIdentifier.getInputIdentifier(); + int inputIdentifier = srcAttemptIdentifier.getInputIdentifier(); + if (LOG.isDebugEnabled()) { + LOG.debug("Received Data via Event: " + srcAttemptIdentifier + " to " + + fetchedInput.getType()); + } + // Count irrespective of whether this is a copy of an already fetched input + lock.lock(); + try { + lastProgressTime = System.currentTimeMillis(); + } finally { + lock.unlock(); + } + + boolean committed = false; + if (!completedInputSet.get(inputIdentifier)) { + synchronized (completedInputSet) { + if (!completedInputSet.get(inputIdentifier)) { + fetchedInput.commit(); + committed = true; + if (!srcAttemptIdentifier.canRetrieveInputInChunks()) { + registerCompletedInput(fetchedInput); + } else { + registerCompletedInputForPipelinedShuffle(srcAttemptIdentifier, + fetchedInput); + } + } + } + } + if (!committed) { + fetchedInput.abort(); // If this fails, the fetcher may attempt another + // abort. + } else { + lock.lock(); + try { + // Signal the wakeLoop to check for termination. + wakeLoop.signal(); + } finally { + lock.unlock(); + } + } + } + protected synchronized void updateEventReceivedTime() { long relativeTime = System.currentTimeMillis() - startTime; if (firstEventReceived.getValue() == 0) { @@ -701,7 +858,9 @@ private void registerCompletedInput(FetchedInput fetchedInput) { private void maybeInformInputReady(FetchedInput fetchedInput) { lock.lock(); try { - completedInputs.add(fetchedInput); + if (!(fetchedInput instanceof NullFetchedInput)) { + completedInputs.add(fetchedInput); + } if (!inputReadyNotificationSent.getAndSet(true)) { // TODO Should eventually be controlled by Inputs which are processing the data. inputContext.inputIsReady(); @@ -718,7 +877,11 @@ private void adjustCompletedInputs(FetchedInput fetchedInput) { int numComplete = numCompletedInputs.incrementAndGet(); if (numComplete == numInputs) { - LOG.info("All inputs fetched for input vertex : " + inputContext.getSourceVertexName()); + // Poison pill End of Input message to awake blocking take call + if (fetchedInput instanceof NullFetchedInput) { + completedInputs.add(fetchedInput); + } + LOG.info("All inputs fetched for input vertex : " + inputContext.getInputOutputVertexNames()); } } finally { lock.unlock(); @@ -783,42 +946,76 @@ private void reportFatalError(Throwable exception, String message) { @Override public void fetchFailed(String host, - InputAttemptIdentifier srcAttemptIdentifier, boolean connectFailed) { + InputAttemptFetchFailure inputAttemptFetchFailure, boolean connectFailed) { // TODO NEWTEZ. Implement logic to report fetch failures after a threshold. // For now, reporting immediately. - LOG.info(srcNameTrimmed + ": " + "Fetch failed for src: " + srcAttemptIdentifier - + "InputIdentifier: " + srcAttemptIdentifier + ", connectFailed: " - + connectFailed); + InputAttemptIdentifier srcAttemptIdentifier = inputAttemptFetchFailure.getInputAttemptIdentifier(); + if (isObsoleteInputAttemptIdentifier(srcAttemptIdentifier)) { + LOG.info("Do not report obsolete input: " + srcAttemptIdentifier); + return; + } + LOG.info( + "{}: Fetch failed for InputIdentifier: {}, connectFailed: {}, " + + "local fetch: {}, remote fetch failure reported as local failure: {})", + sourceDestNameTrimmed, srcAttemptIdentifier, connectFailed, + inputAttemptFetchFailure.isLocalFetch(), inputAttemptFetchFailure.isDiskErrorAtSource()); failedShufflesCounter.increment(1); inputContext.notifyProgress(); if (srcAttemptIdentifier == null) { reportFatalError(null, "Received fetchFailure for an unknown src (null)"); } else { - InputReadErrorEvent readError = InputReadErrorEvent.create( - "Fetch failure while fetching from " - + TezRuntimeUtils.getTaskAttemptIdentifier( - inputContext.getSourceVertexName(), - srcAttemptIdentifier.getInputIdentifier(), - srcAttemptIdentifier.getAttemptNumber()), - srcAttemptIdentifier.getInputIdentifier(), - srcAttemptIdentifier.getAttemptNumber()); - - List failedEvents = Lists.newArrayListWithCapacity(1); - failedEvents.add(readError); - inputContext.sendEvents(failedEvents); + InputReadErrorEvent readError = InputReadErrorEvent.create( + "Fetch failure while fetching from " + + TezRuntimeUtils.getTaskAttemptIdentifier( + inputContext.getSourceVertexName(), + srcAttemptIdentifier.getInputIdentifier(), + srcAttemptIdentifier.getAttemptNumber()), + srcAttemptIdentifier.getInputIdentifier(), + srcAttemptIdentifier.getAttemptNumber(), + inputAttemptFetchFailure.isLocalFetch(), + inputAttemptFetchFailure.isDiskErrorAtSource(), localhostName); + if (maxTimeToWaitForReportMillis > 0) { + try { + reportLock.lock(); + failedEvents.merge(readError, 1, (a, b) -> a + b); + reportCondition.signal(); + } finally { + reportLock.unlock(); + } + } else { + List events = Lists.newArrayListWithCapacity(1); + events.add(readError); + inputContext.sendEvents(events); + } } } + + private boolean isObsoleteInputAttemptIdentifier(InputAttemptIdentifier input) { + if (input == null) { + return false; + } + InputAttemptIdentifier obsoleteInput; + Iterator obsoleteInputsIter = obsoletedInputs.iterator(); + while (obsoleteInputsIter.hasNext()) { + obsoleteInput = obsoleteInputsIter.next(); + if (input.includes(obsoleteInput)) { + return true; + } + } + return false; + } + /////////////////// End of Methods from FetcherCallbackHandler public void shutdown() throws InterruptedException { if (Thread.currentThread().isInterrupted()) { //TODO: need to cleanup all FetchedInput (DiskFetchedInput, LocalDisFetchedInput), lockFile //As of now relying on job cleanup (when all directories would be cleared) - LOG.info(srcNameTrimmed + ": " + "Thread interrupted. Need to cleanup the local dirs"); + LOG.info(sourceDestNameTrimmed + ": " + "Thread interrupted. Need to cleanup the local dirs"); } if (!isShutdown.getAndSet(true)) { // Shut down any pending fetchers - LOG.info("Shutting down pending fetchers on source" + srcNameTrimmed + ": " + LOG.info("Shutting down pending fetchers on source" + sourceDestNameTrimmed + ": " + runningFetchers.size()); lock.lock(); try { @@ -839,26 +1036,16 @@ public void shutdown() throws InterruptedException { if (this.schedulerExecutor != null && !this.schedulerExecutor.isShutdown()) { this.schedulerExecutor.shutdownNow(); } + if (this.reporterExecutor != null + && !this.reporterExecutor.isShutdown()) { + this.reporterExecutor.shutdownNow(); + } if (this.fetcherExecutor != null && !this.fetcherExecutor.isShutdown()) { this.fetcherExecutor.shutdownNow(); // Interrupts all running fetchers. } } } - /////////////////// Methods for walking the available inputs - - /** - * @return true if there is another input ready for consumption. - */ - public boolean newInputAvailable() { - FetchedInput head = completedInputs.peek(); - if (head == null || head instanceof NullFetchedInput) { - return false; - } else { - return true; - } - } - /** * @return true if all of the required inputs have been fetched. */ @@ -877,21 +1064,21 @@ public boolean allInputsFetched() { * but more may become available. */ public FetchedInput getNextInput() throws InterruptedException { - FetchedInput input = null; - do { - // Check for no additional inputs - lock.lock(); - try { - input = completedInputs.peek(); - if (input == null && allInputsFetched()) { - break; - } - } finally { - lock.unlock(); + // Check for no additional inputs + lock.lock(); + try { + if (completedInputs.peek() == null && allInputsFetched()) { + return null; } - input = completedInputs.take(); // block - } while (input instanceof NullFetchedInput); - return input; + } finally { + lock.unlock(); + } + // Block until next input or End of Input message + FetchedInput fetchedInput = completedInputs.take(); + if (fetchedInput instanceof NullFetchedInput) { + fetchedInput = null; + } + return fetchedInput; } public int getNumInputs() { @@ -913,7 +1100,17 @@ public float getNumCompletedInputsFloat() { static class NullFetchedInput extends FetchedInput { public NullFetchedInput(InputAttemptIdentifier inputAttemptIdentifier) { - super(Type.MEMORY, -1, -1, inputAttemptIdentifier, null); + super(inputAttemptIdentifier, null); + } + + @Override + public Type getType() { + return Type.MEMORY; + } + + @Override + public long getSize() { + return -1; } @Override @@ -966,17 +1163,15 @@ private class SchedulerFutureCallback implements FutureCallback { @Override public void onSuccess(Void result) { - LOG.info(srcNameTrimmed + ": " + "Scheduler thread completed"); + LOG.info(sourceDestNameTrimmed + ": " + "Scheduler thread completed"); } @Override public void onFailure(Throwable t) { if (isShutdown.get()) { - if (LOG.isDebugEnabled()) { - LOG.debug(srcNameTrimmed + ": " + "Already shutdown. Ignoring error: " + t); - } + LOG.debug("{}: Already shutdown. Ignoring error.", sourceDestNameTrimmed, t); } else { - LOG.error(srcNameTrimmed + ": " + "Scheduler failed with error: ", t); + LOG.error(sourceDestNameTrimmed + ": " + "Scheduler failed with error: ", t); inputContext.reportFailure(TaskFailureType.NON_FATAL, t, "Shuffle Scheduler Failed"); } } @@ -1005,9 +1200,7 @@ private void doBookKeepingForFetcherComplete() { public void onSuccess(FetchResult result) { fetcher.shutdown(); if (isShutdown.get()) { - if (LOG.isDebugEnabled()) { - LOG.debug(srcNameTrimmed + ": " + "Already shutdown. Ignoring event from fetcher"); - } + LOG.debug("{}: Already shutdown. Ignoring event from fetcher", sourceDestNameTrimmed); } else { Iterable pendingInputs = result.getPendingInputs(); if (pendingInputs != null && pendingInputs.iterator().hasNext()) { @@ -1030,11 +1223,9 @@ public void onFailure(Throwable t) { // Unsuccessful - the fetcher may not have shutdown correctly. Try shutting it down. fetcher.shutdown(); if (isShutdown.get()) { - if (LOG.isDebugEnabled()) { - LOG.debug(srcNameTrimmed + ": " + "Already shutdown. Ignoring error from fetcher: " + t); - } + LOG.debug("{}: Already shutdown. Ignoring error from fetcher.", sourceDestNameTrimmed, t); } else { - LOG.error(srcNameTrimmed + ": " + "Fetcher failed with error: ", t); + LOG.error(sourceDestNameTrimmed + ": " + "Fetcher failed with error: ", t); shuffleError = t; inputContext.reportFailure(TaskFailureType.NON_FATAL, t, "Fetch failed"); doBookKeepingForFetcherComplete(); diff --git a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/impl/SimpleFetchedInputAllocator.java b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/impl/SimpleFetchedInputAllocator.java index f939cd1674..6072c039c1 100644 --- a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/impl/SimpleFetchedInputAllocator.java +++ b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/impl/SimpleFetchedInputAllocator.java @@ -140,7 +140,7 @@ public synchronized FetchedInput allocate(long actualSize, long compressedSize, InputAttemptIdentifier inputAttemptIdentifier) throws IOException { if (actualSize > maxSingleShuffleLimit || this.usedMemory + actualSize > this.memoryLimit) { - return new DiskFetchedInput(actualSize, compressedSize, + return new DiskFetchedInput(compressedSize, inputAttemptIdentifier, this, conf, localDirAllocator, fileNameAllocator); } else { @@ -149,7 +149,7 @@ public synchronized FetchedInput allocate(long actualSize, long compressedSize, LOG.info(srcNameTrimmed + ": " + "Used memory after allocating " + actualSize + " : " + usedMemory); } - return new MemoryFetchedInput(actualSize, compressedSize, inputAttemptIdentifier, this); + return new MemoryFetchedInput(actualSize, inputAttemptIdentifier, this); } } @@ -160,7 +160,7 @@ public synchronized FetchedInput allocateType(Type type, long actualSize, switch (type) { case DISK: - return new DiskFetchedInput(actualSize, compressedSize, + return new DiskFetchedInput(compressedSize, inputAttemptIdentifier, this, conf, localDirAllocator, fileNameAllocator); default: @@ -197,7 +197,7 @@ private void cleanup(FetchedInput fetchedInput) { case DISK: break; case MEMORY: - unreserve(fetchedInput.getActualSize()); + unreserve(((MemoryFetchedInput) fetchedInput).getSize()); break; default: throw new TezUncheckedException("InputType: " + fetchedInput.getType() diff --git a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/FetcherOrderedGrouped.java b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/FetcherOrderedGrouped.java index 2c3aac3520..a4fad0c416 100644 --- a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/FetcherOrderedGrouped.java +++ b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/FetcherOrderedGrouped.java @@ -39,22 +39,27 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.LocalDirAllocator; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.RawLocalFileSystem; import org.apache.hadoop.io.compress.CompressionCodec; import org.apache.tez.common.TezRuntimeFrameworkConfigs; +import org.apache.tez.common.TezUtilsInternal; import org.apache.tez.common.counters.TezCounter; import org.apache.tez.common.security.JobTokenSecretManager; +import org.apache.tez.runtime.api.InputContext; import org.apache.tez.runtime.library.common.Constants; import org.apache.tez.runtime.library.common.InputAttemptIdentifier; import org.apache.tez.runtime.library.common.shuffle.orderedgrouped.MapOutput.Type; import org.apache.tez.runtime.library.common.sort.impl.TezIndexRecord; import org.apache.tez.runtime.library.common.sort.impl.TezSpillRecord; import org.apache.tez.runtime.library.exceptions.FetcherReadTimeoutException; +import org.apache.tez.runtime.library.common.shuffle.InputAttemptFetchFailure; import org.apache.tez.runtime.library.common.shuffle.ShuffleUtils; +import org.apache.tez.runtime.library.common.shuffle.api.ShuffleHandlerError; import com.google.common.annotations.VisibleForTesting; class FetcherOrderedGrouped extends CallableWithNdc { - + private static final Logger LOG = LoggerFactory.getLogger(FetcherOrderedGrouped.class); private static final AtomicInteger nextId = new AtomicInteger(0); @@ -67,18 +72,18 @@ class FetcherOrderedGrouped extends CallableWithNdc { private final TezCounter ioErrs; private final TezCounter wrongLengthErrs; private final TezCounter badIdErrs; - private final TezCounter wrongMapErrs; private final TezCounter wrongReduceErrs; private final FetchedInputAllocatorOrderedGrouped allocator; private final ShuffleScheduler scheduler; private final ExceptionReporter exceptionReporter; private final int id; private final String logIdentifier; + private final RawLocalFileSystem localFs; private final String localShuffleHost; private final int localShufflePort; private final String applicationId; - private final int dagId; - private final MapHost mapHost; + private final int dagId; + protected final MapHost mapHost; private final int minPartition; private final int maxPartition; @@ -114,10 +119,10 @@ public FetcherOrderedGrouped(HttpConnectionParams httpConnectionParams, boolean ifileReadAhead, int ifileReadAheadLength, CompressionCodec codec, Configuration conf, + RawLocalFileSystem localFs, boolean localDiskFetchEnabled, String localHostname, int shufflePort, - String srcNameTrimmed, MapHost mapHost, TezCounter ioErrsCounter, TezCounter wrongLengthErrsCounter, @@ -125,12 +130,11 @@ public FetcherOrderedGrouped(HttpConnectionParams httpConnectionParams, TezCounter wrongMapErrsCounter, TezCounter connectionErrsCounter, TezCounter wrongReduceErrsCounter, - String applicationId, - int dagId, boolean asyncHttp, boolean sslShuffle, boolean verifyDiskChecksum, - boolean compositeFetch) { + boolean compositeFetch, + InputContext inputContext) { this.scheduler = scheduler; this.allocator = allocator; this.exceptionReporter = exceptionReporter; @@ -143,11 +147,10 @@ public FetcherOrderedGrouped(HttpConnectionParams httpConnectionParams, this.ioErrs = ioErrsCounter; this.wrongLengthErrs = wrongLengthErrsCounter; this.badIdErrs = badIdErrsCounter; - this.wrongMapErrs = wrongMapErrsCounter; this.connectionErrs = connectionErrsCounter; this.wrongReduceErrs = wrongReduceErrsCounter; - this.applicationId = applicationId; - this.dagId = dagId; + this.applicationId = inputContext.getApplicationId().toString(); + this.dagId = inputContext.getDagIdentifier(); this.ifileReadAhead = ifileReadAhead; this.ifileReadAheadLength = ifileReadAheadLength; @@ -159,6 +162,7 @@ public FetcherOrderedGrouped(HttpConnectionParams httpConnectionParams, this.codec = null; } this.conf = conf; + this.localFs = localFs; this.localShuffleHost = localHostname; this.localShufflePort = shufflePort; @@ -167,7 +171,9 @@ public FetcherOrderedGrouped(HttpConnectionParams httpConnectionParams, this.verifyDiskChecksum = verifyDiskChecksum; this.compositeFetch = compositeFetch; - this.logIdentifier = "fetcher [" + srcNameTrimmed + "] #" + id; + String sourceDestNameTrimmed = TezUtilsInternal.cleanVertexName(inputContext.getSourceVertexName()) + " -> " + + TezUtilsInternal.cleanVertexName(inputContext.getTaskVertexName()); + this.logIdentifier = "fetcher [" + sourceDestNameTrimmed + "] #" + id; } @VisibleForTesting @@ -204,9 +210,7 @@ public Void callInternal() { public void shutDown() { if (!stopped) { - if (LOG.isDebugEnabled()) { - LOG.debug("Fetcher stopped for host " + mapHost); - } + LOG.debug("Fetcher stopped for host {}", mapHost); stopped = true; // An interrupt will come in while shutting down the thread. cleanupCurrentConnection(false); @@ -236,8 +240,8 @@ private void cleanupCurrentConnection(boolean disconnect) { /** * The crux of the matter... - * - * @param host {@link MapHost} from which we need to + * + * @param host {@link MapHost} from which we need to * shuffle available map-outputs. */ @VisibleForTesting @@ -268,14 +272,15 @@ protected void copyFromHost(MapHost host) throws IOException { // Loop through available map-outputs and fetch them // On any error, faildTasks is not null and we exit - // after putting back the remaining maps to the + // after putting back the remaining maps to the // yet_to_be_fetched list and marking the failed tasks. - InputAttemptIdentifier[] failedTasks = null; + InputAttemptFetchFailure[] failedTasks = null; + while (!remaining.isEmpty() && failedTasks == null) { InputAttemptIdentifier inputAttemptIdentifier = remaining.entrySet().iterator().next().getValue(); // fail immediately after first failure because we dont know how much to - // skip for this error in the input stream. So we cannot move on to the + // skip for this error in the input stream. So we cannot move on to the // remaining outputs. YARN-1773. Will get to them in the next retry. try { failedTasks = copyMapOutput(host, input, inputAttemptIdentifier); @@ -283,40 +288,24 @@ protected void copyFromHost(MapHost host) throws IOException { // Setup connection again if disconnected cleanupCurrentConnection(true); if (stopped) { - if (LOG.isDebugEnabled()) { - LOG.debug("Not re-establishing connection since Fetcher has been stopped"); - } + LOG.debug("Not re-establishing connection since Fetcher has been stopped"); return; } // Connect with retry if (!setupConnection(host, remaining.values())) { if (stopped) { cleanupCurrentConnection(true); - if (LOG.isDebugEnabled()) { - LOG.debug( - "Not reporting connection re-establishment failure since fetcher is stopped"); - } + LOG.debug("Not reporting connection re-establishment failure since fetcher is stopped"); return; } - failedTasks = new InputAttemptIdentifier[] {getNextRemainingAttempt()}; + failedTasks = new InputAttemptFetchFailure[] { + new InputAttemptFetchFailure(getNextRemainingAttempt()) }; break; } } } - if (failedTasks != null && failedTasks.length > 0) { - if (stopped) { - if (LOG.isDebugEnabled()) { - LOG.debug("Ignoring copyMapOutput failures for tasks: " + Arrays.toString(failedTasks) + - " since Fetcher has been stopped"); - } - } else { - LOG.warn("copyMapOutput failed for tasks " + Arrays.toString(failedTasks)); - for (InputAttemptIdentifier left : failedTasks) { - scheduler.copyFailed(left, host, true, false, false); - } - } - } + invokeCopyFailedForFailedTasks(host, failedTasks); cleanupCurrentConnection(false); @@ -330,12 +319,30 @@ protected void copyFromHost(MapHost host) throws IOException { } } + private void invokeCopyFailedForFailedTasks(MapHost host, + InputAttemptFetchFailure[] failedTasks) { + if (failedTasks != null && failedTasks.length > 0) { + if (stopped) { + if (LOG.isDebugEnabled()) { + LOG.debug("Ignoring copyMapOutput failures for tasks: " + Arrays.toString(failedTasks) + + " since Fetcher has been stopped"); + } + } else { + LOG.warn("copyMapOutput failed for tasks " + Arrays.toString(failedTasks)); + for (InputAttemptFetchFailure left : failedTasks) { + scheduler.copyFailed(left, host, true, false); + } + } + } + } + @VisibleForTesting boolean setupConnection(MapHost host, Collection attempts) throws IOException { boolean connectSucceeded = false; + StringBuilder baseURI = null; try { - StringBuilder baseURI = ShuffleUtils.constructBaseURIForShuffleHandler(host.getHost(), + baseURI = ShuffleUtils.constructBaseURIForShuffleHandler(host.getHost(), host.getPort(), host.getPartitionId(), host.getPartitionCount(), applicationId, dagId, sslShuffle); URL url = ShuffleUtils.constructInputURL(baseURI.toString(), attempts, httpConnectionParams.isKeepAlive()); httpConnection = ShuffleUtils.getHttpConnection(asyncHttp, url, httpConnectionParams, @@ -343,31 +350,28 @@ boolean setupConnection(MapHost host, Collection attempt connectSucceeded = httpConnection.connect(); if (stopped) { - if (LOG.isDebugEnabled()) { - LOG.debug("Detected fetcher has been shutdown after connection establishment. Returning"); - } + LOG.debug("Detected fetcher has been shutdown after connection establishment. Returning"); return false; } - input = httpConnection.getInputStream(); - httpConnection.validate(); + setupConnectionInternal(host, attempts); return true; } catch (IOException | InterruptedException ie) { if (ie instanceof InterruptedException) { Thread.currentThread().interrupt(); //reset status } if (stopped) { - if (LOG.isDebugEnabled()) { - LOG.debug("Not reporting fetch failure, since an Exception was caught after shutdown"); - } + LOG.debug("Not reporting fetch failure, since an Exception was caught after shutdown"); return false; } ioErrs.increment(1); if (!connectSucceeded) { - LOG.warn("Failed to connect to " + host + " with " + remaining.size() + " inputs", ie); + LOG.warn("FETCH_FAILURE: Failed to connect from {} to {} with {} inputs, url: {}", localShuffleHost, + host, remaining.size(), baseURI, ie); connectionErrs.increment(1); } else { - LOG.warn("Failed to verify reply after connecting to " + host + " with " + remaining.size() - + " inputs pending", ie); + LOG.warn( + "FETCH_FAILURE: Failed to verify reply after connecting from {} to {} with {} inputs pending, url: {}", + localShuffleHost, host, remaining.size(), baseURI, ie); } // At this point, either the connection failed, or the initial header verification failed. @@ -376,12 +380,19 @@ boolean setupConnection(MapHost host, Collection attempt for (InputAttemptIdentifier left : remaining.values()) { // Need to be handling temporary glitches .. // Report read error to the AM to trigger source failure heuristics - scheduler.copyFailed(left, host, connectSucceeded, !connectSucceeded, false); + scheduler.copyFailed(InputAttemptFetchFailure.fromAttempt(left).withCause(ie), host, connectSucceeded, + !connectSucceeded); } return false; } } + protected void setupConnectionInternal(MapHost host, Collection attempts) + throws IOException, InterruptedException { + input = httpConnection.getInputStream(); + httpConnection.validate(); + } + @VisibleForTesting protected void putBackRemainingMapOutputs(MapHost host) { // Cycle through remaining MapOutputs @@ -400,7 +411,8 @@ protected void putBackRemainingMapOutputs(MapHost host) { } } - private static InputAttemptIdentifier[] EMPTY_ATTEMPT_ID_ARRAY = new InputAttemptIdentifier[0]; + private static final InputAttemptFetchFailure[] EMPTY_ATTEMPT_ID_ARRAY = + new InputAttemptFetchFailure[0]; private static class MapOutputStat { final InputAttemptIdentifier srcAttemptId; @@ -421,8 +433,8 @@ public String toString() { } } - protected InputAttemptIdentifier[] copyMapOutput(MapHost host, - DataInputStream input, InputAttemptIdentifier inputAttemptIdentifier) throws FetcherReadTimeoutException { + protected InputAttemptFetchFailure[] copyMapOutput(MapHost host, DataInputStream input, + InputAttemptIdentifier inputAttemptIdentifier) throws FetcherReadTimeoutException, IOException { MapOutput mapOutput = null; InputAttemptIdentifier srcAttemptId = null; long decompressedLength = 0; @@ -448,11 +460,15 @@ protected InputAttemptIdentifier[] copyMapOutput(MapHost host, badIdErrs.increment(1); LOG.warn("Invalid map id: " + header.mapId + ", expected to start with " + InputAttemptIdentifier.PATH_PREFIX + ", partition: " + header.forReduce); - return new InputAttemptIdentifier[]{getNextRemainingAttempt()}; - } else { - if (LOG.isDebugEnabled()) { - LOG.debug("Already shutdown. Ignoring invalid map id error"); + if (header.mapId.startsWith(ShuffleHandlerError.DISK_ERROR_EXCEPTION.toString())) { + //this should be treated as local fetch failure while reporting later + return new InputAttemptFetchFailure[] { + InputAttemptFetchFailure.fromDiskErrorAtSource(getNextRemainingAttempt()) }; } + return new InputAttemptFetchFailure[] { + InputAttemptFetchFailure.fromAttempt(getNextRemainingAttempt()) }; + } else { + LOG.debug("Already shutdown. Ignoring invalid map id error"); return EMPTY_ATTEMPT_ID_ARRAY; } } @@ -471,9 +487,10 @@ protected InputAttemptIdentifier[] copyMapOutput(MapHost host, if (!stopped) { badIdErrs.increment(1); LOG.warn("Invalid map id ", e); - // Don't know which one was bad, so consider this one bad and dont read - // the remaining because we dont know where to start reading from. YARN-1773 - return new InputAttemptIdentifier[]{getNextRemainingAttempt()}; + // Don't know which one was bad, so consider this one bad and don't read + // the remaining because we don't know where to start reading from. YARN-1773 + return new InputAttemptFetchFailure[] { + new InputAttemptFetchFailure(getNextRemainingAttempt()) }; } else { if (LOG.isDebugEnabled()) { LOG.debug("Already shutdown. Ignoring invalid map id error. Exception: " + @@ -493,11 +510,10 @@ protected InputAttemptIdentifier[] copyMapOutput(MapHost host, LOG.warn("Was expecting " + srcAttemptId + " but got null"); } assert (srcAttemptId != null); - return new InputAttemptIdentifier[]{srcAttemptId}; + return new InputAttemptFetchFailure[] { + new InputAttemptFetchFailure(getNextRemainingAttempt()) }; } else { - if (LOG.isDebugEnabled()) { - LOG.debug("Already stopped. Ignoring verification failure."); - } + LOG.debug("Already stopped. Ignoring verification failure."); return EMPTY_ATTEMPT_ID_ARRAY; } } @@ -521,9 +537,7 @@ protected InputAttemptIdentifier[] copyMapOutput(MapHost host, ioErrs.increment(1); scheduler.reportLocalError(e); } else { - if (LOG.isDebugEnabled()) { - LOG.debug("Already stopped. Ignoring error from merger.reserve"); - } + LOG.debug("Already stopped. Ignoring error from merger.reserve"); } return EMPTY_ATTEMPT_ID_ARRAY; } @@ -565,7 +579,7 @@ protected InputAttemptIdentifier[] copyMapOutput(MapHost host, endTime - startTime, mapOutput, false); } remaining.remove(inputAttemptIdentifier.toString()); - } catch(IOException ioe) { + } catch(IOException | InternalError ioe) { if (stopped) { if (LOG.isDebugEnabled()) { LOG.debug("Not reporting fetch failure for exception during data copy: [" @@ -591,9 +605,10 @@ protected InputAttemptIdentifier[] copyMapOutput(MapHost host, srcAttemptId + " decomp: " + decompressedLength + ", " + compressedLength, ioe); if (srcAttemptId == null) { - return remaining.values().toArray(new InputAttemptIdentifier[remaining.values().size()]); + return InputAttemptFetchFailure.fromAttempts(remaining.values()); } else { - return new InputAttemptIdentifier[]{srcAttemptId}; + return new InputAttemptFetchFailure[] { + new InputAttemptFetchFailure(srcAttemptId) }; } } LOG.warn("Failed to shuffle output of " + srcAttemptId + @@ -601,7 +616,8 @@ protected InputAttemptIdentifier[] copyMapOutput(MapHost host, // Inform the shuffle-scheduler mapOutput.abort(); - return new InputAttemptIdentifier[] {srcAttemptId}; + return new InputAttemptFetchFailure[] { + new InputAttemptFetchFailure(srcAttemptId) }; } return null; } @@ -614,7 +630,7 @@ protected InputAttemptIdentifier[] copyMapOutput(MapHost host, * @return true to indicate connection retry. false otherwise. * @throws IOException */ - private boolean shouldRetry(MapHost host, IOException ioe) { + private boolean shouldRetry(MapHost host, Throwable ioe) { if (!(ioe instanceof SocketTimeoutException)) { return false; } @@ -624,7 +640,7 @@ private boolean shouldRetry(MapHost host, IOException ioe) { retryStartTime = currentTime; } - if (currentTime - retryStartTime < httpConnectionParams.getReadTimeout()) { + if ((currentTime - retryStartTime) - httpConnectionParams.getReadTimeout() < 0) { LOG.warn("Shuffle output from " + host.getHostIdentifier() + " failed, retry it."); //retry connecting to the host @@ -636,7 +652,7 @@ private boolean shouldRetry(MapHost host, IOException ioe) { return false; } } - + /** * Do some basic verification on the input received -- Being defensive * @param compressedLength @@ -651,7 +667,7 @@ private boolean verifySanity(long compressedLength, long decompressedLength, if (compressedLength < 0 || decompressedLength < 0) { wrongLengthErrs.increment(1); LOG.warn(logIdentifier + " invalid lengths in map output header: id: " + - srcAttemptId + " len: " + compressedLength + ", decomp len: " + + srcAttemptId + " len: " + compressedLength + ", decomp len: " + decompressedLength); return false; } @@ -667,7 +683,7 @@ private boolean verifySanity(long compressedLength, long decompressedLength, } return true; } - + private InputAttemptIdentifier getNextRemainingAttempt() { if (remaining.size() > 0) { return remaining.values().iterator().next(); @@ -715,26 +731,27 @@ protected void setupLocalDiskFetch(MapHost host) throws InterruptedException { srcAttemptId = scheduler.getIdentifierForFetchedOutput(srcAttemptId.getPathComponent(), reduceId); Path filename = getShuffleInputFileName(srcAttemptId.getPathComponent(), null); TezIndexRecord indexRecord = getIndexRecord(srcAttemptId.getPathComponent(), reduceId); + if(!indexRecord.hasData()) { + continue; + } mapOutput = getMapOutputForDirectDiskFetch(srcAttemptId, filename, indexRecord); long endTime = System.currentTimeMillis(); scheduler.copySucceeded(srcAttemptId, host, indexRecord.getPartLength(), indexRecord.getRawLength(), (endTime - startTime), mapOutput, true); - } catch (IOException e) { + } catch (IOException | InternalError e) { if (mapOutput != null) { mapOutput.abort(); } if (!stopped) { hasFailures = true; ioErrs.increment(1); - scheduler.copyFailed(srcAttemptId, host, true, false, true); + scheduler.copyFailed(InputAttemptFetchFailure.fromLocalFetchFailure(srcAttemptId).withCause(e), + host, true, false); LOG.warn("Failed to read local disk output of " + srcAttemptId + " from " + host.getHostIdentifier(), e); } else { - if (LOG.isDebugEnabled()) { - LOG.debug( - "Ignoring fetch error during local disk copy since fetcher has already been stopped"); - } + LOG.debug("Ignoring fetch error during local disk copy since fetcher has already been stopped"); return; } @@ -769,7 +786,7 @@ protected TezIndexRecord getIndexRecord(String pathComponent, int partitionId) throws IOException { Path indexFile = getShuffleInputFileName(pathComponent, Constants.TEZ_RUNTIME_TASK_OUTPUT_INDEX_SUFFIX_STRING); - TezSpillRecord spillRecord = new TezSpillRecord(indexFile, conf); + TezSpillRecord spillRecord = new TezSpillRecord(indexFile, localFs); return spillRecord.getIndex(partitionId); } diff --git a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/FetcherOrderedGroupedWithInjectableErrors.java b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/FetcherOrderedGroupedWithInjectableErrors.java new file mode 100644 index 0000000000..9c782f6585 --- /dev/null +++ b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/FetcherOrderedGroupedWithInjectableErrors.java @@ -0,0 +1,68 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tez.runtime.library.common.shuffle.orderedgrouped; + +import java.io.IOException; +import java.util.Collection; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.RawLocalFileSystem; +import org.apache.hadoop.io.compress.CompressionCodec; +import org.apache.tez.common.TezUtilsInternal; +import org.apache.tez.common.counters.TezCounter; +import org.apache.tez.common.security.JobTokenSecretManager; +import org.apache.tez.http.HttpConnectionParams; +import org.apache.tez.runtime.api.InputContext; +import org.apache.tez.runtime.library.common.InputAttemptIdentifier; +import org.apache.tez.runtime.library.common.shuffle.FetcherErrorTestingConfig; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class FetcherOrderedGroupedWithInjectableErrors extends FetcherOrderedGrouped { + private static final Logger LOG = LoggerFactory.getLogger(FetcherOrderedGroupedWithInjectableErrors.class); + + private FetcherErrorTestingConfig fetcherErrorTestingConfig; + private String srcNameTrimmed; + + public FetcherOrderedGroupedWithInjectableErrors(HttpConnectionParams httpConnectionParams, + ShuffleScheduler scheduler, FetchedInputAllocatorOrderedGrouped allocator, ExceptionReporter exceptionReporter, + JobTokenSecretManager jobTokenSecretMgr, boolean ifileReadAhead, int ifileReadAheadLength, CompressionCodec codec, + Configuration conf, RawLocalFileSystem localFs, boolean localDiskFetchEnabled, String localHostname, + int shufflePort, MapHost mapHost, TezCounter ioErrsCounter, + TezCounter wrongLengthErrsCounter, TezCounter badIdErrsCounter, TezCounter wrongMapErrsCounter, + TezCounter connectionErrsCounter, TezCounter wrongReduceErrsCounter, boolean asyncHttp, + boolean sslShuffle, boolean verifyDiskChecksum, boolean compositeFetch, InputContext inputContext) { + super(httpConnectionParams, scheduler, allocator, exceptionReporter, jobTokenSecretMgr, ifileReadAhead, + ifileReadAheadLength, codec, conf, localFs, localDiskFetchEnabled, localHostname, shufflePort, + mapHost, ioErrsCounter, wrongLengthErrsCounter, badIdErrsCounter, wrongMapErrsCounter, connectionErrsCounter, + wrongReduceErrsCounter, asyncHttp, sslShuffle, verifyDiskChecksum, compositeFetch, inputContext); + this.fetcherErrorTestingConfig = new FetcherErrorTestingConfig(conf, inputContext.getObjectRegistry()); + this.srcNameTrimmed = TezUtilsInternal.cleanVertexName(inputContext.getSourceVertexName()); + LOG.info("Initialized FetcherOrderedGroupedWithInjectableErrors with config: {}", fetcherErrorTestingConfig); + } + + @Override + protected void setupConnectionInternal(MapHost host, Collection attempts) + throws IOException, InterruptedException { + LOG.info("Checking if fetcher should fail for host: {} ...", mapHost.getHost()); + for (InputAttemptIdentifier inputAttemptIdentifier : attempts) { + if (fetcherErrorTestingConfig.shouldFail(mapHost.getHost(), srcNameTrimmed, inputAttemptIdentifier)) { + throw new IOException(String.format( + "FetcherOrderedGroupedWithInjectableErrors tester made failure for host: %s, input attempt: %s", + mapHost.getHost(), inputAttemptIdentifier.getAttemptNumber())); + } + } + super.setupConnectionInternal(host, attempts); + } +} diff --git a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/InMemoryWriter.java b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/InMemoryWriter.java index 46dc72ed5d..c5db7c9ade 100644 --- a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/InMemoryWriter.java +++ b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/InMemoryWriter.java @@ -48,9 +48,12 @@ public InMemoryWriter(byte[] array) { } public InMemoryWriter(BoundedByteArrayOutputStream arrayStream) { - super(null, null); - this.out = - new NonSyncDataOutputStream(new IFileOutputStream(arrayStream)); + this(arrayStream, false); + } + + public InMemoryWriter(BoundedByteArrayOutputStream arrayStream, boolean rle) { + super(null, null, rle); + this.out = new NonSyncDataOutputStream(new IFileOutputStream(arrayStream)); } public void append(Object key, Object value) throws IOException { diff --git a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/MergeManager.java b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/MergeManager.java index 6ffdb5657b..eb13c03549 100644 --- a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/MergeManager.java +++ b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/MergeManager.java @@ -18,7 +18,7 @@ package org.apache.tez.runtime.library.common.shuffle.orderedgrouped; import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; +import org.apache.tez.common.Preconditions; import org.apache.commons.io.FilenameUtils; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceAudience.Private; @@ -30,7 +30,6 @@ import org.apache.hadoop.fs.LocalDirAllocator; import org.apache.hadoop.fs.LocalFileSystem; import org.apache.hadoop.fs.Path; -import org.apache.hadoop.io.BoundedByteArrayOutputStream; import org.apache.hadoop.io.DataInputBuffer; import org.apache.hadoop.io.FileChunk; import org.apache.hadoop.io.RawComparator; @@ -47,6 +46,7 @@ import org.apache.tez.runtime.library.common.Constants; import org.apache.tez.runtime.library.common.InputAttemptIdentifier; import org.apache.tez.runtime.library.common.combine.Combiner; +import org.apache.tez.runtime.library.common.serializer.SerializationContext; import org.apache.tez.runtime.library.common.sort.impl.IFile; import org.apache.tez.runtime.library.common.sort.impl.IFile.Writer; import org.apache.tez.runtime.library.common.sort.impl.TezMerger; @@ -78,14 +78,14 @@ @InterfaceStability.Unstable @SuppressWarnings(value={"rawtypes"}) public class MergeManager implements FetchedInputAllocatorOrderedGrouped { - + private static final Logger LOG = LoggerFactory.getLogger(MergeManager.class); private final Configuration conf; private final FileSystem localFS; private final FileSystem rfs; private final LocalDirAllocator localDirAllocator; - + private final TezTaskOutputFiles mapOutputFile; private final Progressable progressable = new Progressable() { @Override @@ -93,8 +93,8 @@ public void progress() { inputContext.notifyProgress(); } }; - private final Combiner combiner; - + private final Combiner combiner; + @VisibleForTesting final Set inMemoryMergedMapOutputs = new TreeSet(new MapOutput.MapOutputComparator()); @@ -109,7 +109,7 @@ public void progress() { final Set onDiskMapOutputs = new TreeSet(); @VisibleForTesting final OnDiskMerger onDiskMerger; - + private final long memoryLimit; @VisibleForTesting final long postMergeMemLimit; @@ -120,13 +120,13 @@ public void progress() { private final AtomicBoolean isShutdown = new AtomicBoolean(false); - private final int memToMemMergeOutputsThreshold; + private final int memToMemMergeOutputsThreshold; private final long mergeThreshold; - + private final long initialMemoryAvailable; private final ExceptionReporter exceptionReporter; - + private final InputContext inputContext; private final TezCounter spilledRecordsCounter; @@ -134,16 +134,16 @@ public void progress() { private final TezCounter reduceCombineInputCounter; private final TezCounter mergedMapOutputsCounter; - + private final TezCounter numMemToDiskMerges; private final TezCounter numDiskToDiskMerges; private final TezCounter additionalBytesWritten; private final TezCounter additionalBytesRead; - + private final CompressionCodec codec; - + private volatile boolean finalMergeComplete = false; - + private final boolean ifileReadAhead; private final int ifileReadAheadLength; private final int ifileBufferSize; @@ -158,12 +158,14 @@ public void progress() { private final boolean cleanup; + private SerializationContext serializationContext; + /** * Construct the MergeManager. Must call start before it becomes usable. */ public MergeManager(Configuration conf, FileSystem localFS, - LocalDirAllocator localDirAllocator, + LocalDirAllocator localDirAllocator, InputContext inputContext, Combiner combiner, TezCounter spilledRecordsCounter, @@ -179,7 +181,7 @@ public MergeManager(Configuration conf, this.localDirAllocator = localDirAllocator; this.exceptionReporter = exceptionReporter; this.initialMemoryAvailable = initialMemoryAvailable; - + this.combiner = combiner; this.reduceCombineInputCounter = reduceCombineInputCounter; @@ -191,7 +193,7 @@ public MergeManager(Configuration conf, this.localFS = localFS; this.rfs = ((LocalFileSystem)localFS).getRaw(); - + this.numDiskToDiskMerges = inputContext.getCounters().findCounter(TaskCounter.NUM_DISK_TO_DISK_MERGES); this.numMemToDiskMerges = inputContext.getCounters().findCounter(TaskCounter.NUM_MEM_TO_DISK_MERGES); this.additionalBytesWritten = inputContext.getCounters().findCounter(TaskCounter.ADDITIONAL_SPILLS_BYTES_WRITTEN); @@ -209,11 +211,11 @@ public MergeManager(Configuration conf, } this.ifileBufferSize = conf.getInt("io.file.buffer.size", TezRuntimeConfiguration.TEZ_RUNTIME_IFILE_BUFFER_SIZE_DEFAULT); - + // Figure out initial memory req start final float maxInMemCopyUse = conf.getFloat( - TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT, + TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT, TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT_DEFAULT); if (maxInMemCopyUse > 1.0 || maxInMemCopyUse < 0.0) { throw new IllegalArgumentException("Invalid value for " + @@ -233,13 +235,13 @@ public MergeManager(Configuration conf, long maxRedBuffer = (long) (inputContext.getTotalMemoryAvailableToTask() * maxRedPer); // Figure out initial memory req end - + if (this.initialMemoryAvailable < memLimit) { this.memoryLimit = this.initialMemoryAvailable; } else { this.memoryLimit = memLimit; } - + if (this.initialMemoryAvailable < maxRedBuffer) { this.postMergeMemLimit = this.initialMemoryAvailable; } else { @@ -248,18 +250,18 @@ public MergeManager(Configuration conf, if (LOG.isDebugEnabled()) { LOG.debug( - inputContext.getSourceVertexName() + ": " + "InitialRequest: ShuffleMem=" + memLimit + + inputContext.getInputOutputVertexNames() + ": " + "InitialRequest: ShuffleMem=" + memLimit + ", postMergeMem=" + maxRedBuffer + ", RuntimeTotalAvailable=" + this.initialMemoryAvailable + ". Updated to: ShuffleMem=" + this.memoryLimit + ", postMergeMem: " + this.postMergeMemLimit); } - this.ioSortFactor = + this.ioSortFactor = conf.getInt( - TezRuntimeConfiguration.TEZ_RUNTIME_IO_SORT_FACTOR, + TezRuntimeConfiguration.TEZ_RUNTIME_IO_SORT_FACTOR, TezRuntimeConfiguration.TEZ_RUNTIME_IO_SORT_FACTOR_DEFAULT); - + final float singleShuffleMemoryLimitPercent = conf.getFloat( TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_MEMORY_LIMIT_PERCENT, @@ -272,46 +274,56 @@ public MergeManager(Configuration conf, } //TODO: Cap it to MAX_VALUE until MapOutput starts supporting > 2 GB - this.maxSingleShuffleLimit = + this.maxSingleShuffleLimit = (long) Math.min((memoryLimit * singleShuffleMemoryLimitPercent), Integer.MAX_VALUE); - this.memToMemMergeOutputsThreshold = + this.memToMemMergeOutputsThreshold = conf.getInt( - TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_MEMTOMEM_SEGMENTS, + TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_MEMTOMEM_SEGMENTS, ioSortFactor); - this.mergeThreshold = - (long)(this.memoryLimit * + this.mergeThreshold = + (long)(this.memoryLimit * conf.getFloat( - TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_MERGE_PERCENT, + TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_MERGE_PERCENT, TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_MERGE_PERCENT_DEFAULT)); - LOG.info(inputContext.getSourceVertexName() + ": MergerManager: memoryLimit=" + memoryLimit + ", " + + LOG.info(inputContext.getInputOutputVertexNames() + ": MergerManager: memoryLimit=" + memoryLimit + ", " + "maxSingleShuffleLimit=" + maxSingleShuffleLimit + ", " + - "mergeThreshold=" + mergeThreshold + ", " + + "mergeThreshold=" + mergeThreshold + ", " + "ioSortFactor=" + ioSortFactor + ", " + "postMergeMem=" + postMergeMemLimit + ", " + "memToMemMergeOutputsThreshold=" + memToMemMergeOutputsThreshold); - + if (this.maxSingleShuffleLimit >= this.mergeThreshold) { - throw new RuntimeException("Invlaid configuration: " + throw new RuntimeException("Invalid configuration: " + "maxSingleShuffleLimit should be less than mergeThreshold" + "maxSingleShuffleLimit: " + this.maxSingleShuffleLimit + ", mergeThreshold: " + this.mergeThreshold); } - - boolean allowMemToMemMerge = - conf.getBoolean( - TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_ENABLE_MEMTOMEM, + + boolean allowMemToMemMerge = + conf.getBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_ENABLE_MEMTOMEM, TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_ENABLE_MEMTOMEM_DEFAULT); - if (allowMemToMemMerge) { - this.memToMemMerger = - new IntermediateMemoryToMemoryMerger(this, - memToMemMergeOutputsThreshold); - } else { - this.memToMemMerger = null; - } - - this.inMemoryMerger = new InMemoryMerger(this); - - this.onDiskMerger = new OnDiskMerger(this); + if (allowMemToMemMerge) { + this.memToMemMerger = + new IntermediateMemoryToMemoryMerger(this, memToMemMergeOutputsThreshold); + } else { + this.memToMemMerger = null; + } + + this.inMemoryMerger = new InMemoryMerger(this); + + this.onDiskMerger = new OnDiskMerger(this); + + this.serializationContext = new SerializationContext(conf); + } + + void setupParentThread(Thread shuffleSchedulerThread) { + LOG.info("Setting merger's parent thread to " + + shuffleSchedulerThread.getName()); + if (this.memToMemMerger != null) { + memToMemMerger.setParentThread(shuffleSchedulerThread); + } + this.inMemoryMerger.setParentThread(shuffleSchedulerThread); + this.onDiskMerger.setParentThread(shuffleSchedulerThread); } @Private @@ -330,7 +342,7 @@ void configureAndStart() { static long getInitialMemoryRequirement(Configuration conf, long maxAvailableTaskMemory) { final float maxInMemCopyUse = conf.getFloat( - TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT, + TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT, TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT_DEFAULT); if (maxInMemCopyUse > 1.0 || maxInMemCopyUse < 0.0) { throw new IllegalArgumentException("Invalid value for " + @@ -341,7 +353,7 @@ static long getInitialMemoryRequirement(Configuration conf, long maxAvailableTas // Allow unit tests to fix Runtime memory long memLimit = conf.getLong(Constants.TEZ_RUNTIME_TASK_MEMORY, (long)(maxAvailableTaskMemory * maxInMemCopyUse)); - + float maxRedPer = conf.getFloat(TezRuntimeConfiguration.TEZ_RUNTIME_INPUT_POST_MERGE_BUFFER_PERCENT, TezRuntimeConfiguration.TEZ_RUNTIME_INPUT_BUFFER_PERCENT_DEFAULT); if (maxRedPer > 1.0 || maxRedPer < 0.0) { @@ -376,9 +388,7 @@ public void waitForInMemoryMerge() throws InterruptedException { } if (triggerAdditionalMerge) { inMemoryMerger.waitForMerge(); - if (LOG.isDebugEnabled()) { - LOG.debug("Additional in-memory merge triggered"); - } + LOG.debug("Additional in-memory merge triggered"); } } @@ -400,7 +410,7 @@ public synchronized void waitForShuffleToMergeMemory() throws InterruptedExcepti final private MapOutput stallShuffle = MapOutput.createWaitMapOutput(null); @Override - public synchronized MapOutput reserve(InputAttemptIdentifier srcAttemptIdentifier, + public synchronized MapOutput reserve(InputAttemptIdentifier srcAttemptIdentifier, long requestedSize, long compressedLength, int fetcher @@ -414,7 +424,7 @@ public synchronized MapOutput reserve(InputAttemptIdentifier srcAttemptIdentifie return MapOutput.createDiskMapOutput(srcAttemptIdentifier, this, compressedLength, conf, fetcher, true, mapOutputFile); } - + // Stall shuffle if we are above the memory limit // It is possible that all threads could just be stalling and not make @@ -429,7 +439,7 @@ public synchronized MapOutput reserve(InputAttemptIdentifier srcAttemptIdentifie // (usedMemory + requestedSize > memoryLimit). When this thread is done // fetching, this will automatically trigger a merge thereby unlocking // all the stalled threads - + if (usedMemory > memoryLimit) { if (LOG.isDebugEnabled()) { LOG.debug(srcAttemptIdentifier + ": Stalling shuffle since usedMemory (" + usedMemory @@ -438,7 +448,7 @@ public synchronized MapOutput reserve(InputAttemptIdentifier srcAttemptIdentifie } return stallShuffle; } - + // Allow the in-memory shuffle to progress if (LOG.isDebugEnabled()) { LOG.debug(srcAttemptIdentifier + ": Proceeding with shuffle since usedMemory (" @@ -447,7 +457,7 @@ public synchronized MapOutput reserve(InputAttemptIdentifier srcAttemptIdentifie } return unconditionalReserve(srcAttemptIdentifier, requestedSize, true); } - + /** * Unconditional Reserve is used by the Memory-to-Memory thread */ @@ -477,7 +487,7 @@ public synchronized void releaseCommittedMemory(long size) { @Override - public synchronized void closeInMemoryFile(MapOutput mapOutput) { + public synchronized void closeInMemoryFile(MapOutput mapOutput) { inMemoryMapOutputs.add(mapOutput); trackAndLogCloseInMemoryFile(mapOutput); @@ -490,7 +500,7 @@ public synchronized void closeInMemoryFile(MapOutput mapOutput) { // This should likely run a Combiner. if (memToMemMerger != null) { synchronized (memToMemMerger) { - if (!memToMemMerger.isInProgress() && + if (!memToMemMerger.isInProgress() && inMemoryMapOutputs.size() >= memToMemMergeOutputsThreshold) { memToMemMerger.startMerge(inMemoryMapOutputs); } @@ -539,8 +549,9 @@ private void trackAndLogCloseInMemoryFile(MapOutput mapOutput) { private void startMemToDiskMerge() { synchronized (inMemoryMerger) { if (!inMemoryMerger.isInProgress()) { - LOG.info(inputContext.getSourceVertexName() + ": " + "Starting inMemoryMerger's merge since commitMemory=" + - commitMemory + " > mergeThreshold=" + mergeThreshold + + LOG.info(inputContext.getInputOutputVertexNames() + ": " + + "Starting inMemoryMerger's merge since commitMemory=" + commitMemory + + " > mergeThreshold=" + mergeThreshold + ". Current usedMemory=" + usedMemory); inMemoryMapOutputs.addAll(inMemoryMergedMapOutputs); inMemoryMergedMapOutputs.clear(); @@ -548,7 +559,7 @@ private void startMemToDiskMerge() { } } } - + public synchronized void closeInMemoryMergedFile(MapOutput mapOutput) { inMemoryMergedMapOutputs.add(mapOutput); if (LOG.isDebugEnabled()) { @@ -573,7 +584,7 @@ public FileSystem getLocalFileSystem() { @Override public synchronized void closeOnDiskFile(FileChunk file) { - //including only path & offset for valdiations. + //including only path & offset for validations. for (FileChunk fileChunk : onDiskMapOutputs) { if (fileChunk.getPath().equals(file.getPath())) { //ensure offsets are not the same. @@ -616,7 +627,7 @@ private void logCloseOnDiskFile(FileChunk file) { * Should only be used after the Shuffle phaze is complete, otherwise can * return an invalid state since a merge may not be in progress dur to * inadequate inputs - * + * * @return true if the merge process is complete, otherwise false */ @Private @@ -688,9 +699,7 @@ static void cleanup(FileSystem fs, Path path) { } try { - if (LOG.isDebugEnabled()) { - LOG.debug("Deleting " + path); - } + LOG.debug("Deleting {}", path); fs.delete(path, true); } catch (IOException e) { LOG.info("Error in deleting " + path); @@ -707,14 +716,15 @@ void runCombineProcessor(TezRawKeyValueIterator kvIter, Writer writer) /** * Merges multiple in-memory segment to another in-memory segment */ - private class IntermediateMemoryToMemoryMerger + private class IntermediateMemoryToMemoryMerger extends MergeThread { - - public IntermediateMemoryToMemoryMerger(MergeManager manager, + + public IntermediateMemoryToMemoryMerger(MergeManager manager, int mergeFactor) { super(manager, mergeFactor, exceptionReporter); setName("MemToMemMerger [" + TezUtilsInternal - .cleanVertexName(inputContext.getSourceVertexName()) + "]"); + .cleanVertexName(inputContext.getSourceVertexName()) + + "_" + inputContext.getUniqueIdentifier() + "]"); setDaemon(true); } @@ -726,7 +736,7 @@ public void merge(List inputs) throws IOException, InterruptedExcepti inputContext.notifyProgress(); - InputAttemptIdentifier dummyMapId = inputs.get(0).getAttemptIdentifier(); + InputAttemptIdentifier dummyMapId = inputs.get(0).getAttemptIdentifier(); List inMemorySegments = new ArrayList(); MapOutput mergedMapOutputs = null; @@ -779,8 +789,8 @@ public void merge(List inputs) throws IOException, InterruptedExcepti Writer writer = new InMemoryWriter(mergedMapOutputs.getMemory()); - LOG.info(inputContext.getSourceVertexName() + ": " + "Initiating Memory-to-Memory merge with " + noInMemorySegments + - " segments of total-size: " + mergeOutputSize); + LOG.info(inputContext.getInputOutputVertexNames() + ": " + "Initiating Memory-to-Memory merge with " + + noInMemorySegments + " segments of total-size: " + mergeOutputSize); if (Thread.currentThread().isInterrupted()) { return; // early exit @@ -789,18 +799,17 @@ public void merge(List inputs) throws IOException, InterruptedExcepti // Nothing will be materialized to disk because the sort factor is being // set to the number of in memory segments. // TODO Is this doing any combination ? - TezRawKeyValueIterator rIter = + TezRawKeyValueIterator rIter = TezMerger.merge(conf, rfs, - ConfigUtils.getIntermediateInputKeyClass(conf), - ConfigUtils.getIntermediateInputValueClass(conf), + serializationContext, inMemorySegments, inMemorySegments.size(), new Path(inputContext.getUniqueIdentifier()), (RawComparator)ConfigUtils.getIntermediateInputKeyComparator(conf), - progressable, null, null, null, null); + progressable, null, null, null, null); TezMerger.writeFile(rIter, writer, progressable, TezRuntimeConfiguration.TEZ_RUNTIME_RECORDS_BEFORE_PROGRESS_DEFAULT); writer.close(); - LOG.info(inputContext.getSourceVertexName() + + LOG.info(inputContext.getInputOutputVertexNames() + " Memory-to-Memory merge of the " + noInMemorySegments + " files in-memory complete with mergeOutputSize=" + mergeOutputSize); @@ -814,7 +823,7 @@ public void cleanup(List inputs, boolean deleteData) throws IOExcepti //No OP } } - + /** * Merges multiple in-memory segment to a disk segment */ @@ -831,11 +840,12 @@ private class InMemoryMerger extends MergeThread { public InMemoryMerger(MergeManager manager) { super(manager, Integer.MAX_VALUE, exceptionReporter); - setName("MemtoDiskMerger [" + TezUtilsInternal - .cleanVertexName(inputContext.getSourceVertexName()) + "]"); + setName("MemtoDiskMerger [" + TezUtilsInternal + .cleanVertexName(inputContext.getSourceVertexName()) + + "_" + inputContext.getUniqueIdentifier() + "]"); setDaemon(true); } - + @Override public void merge(List inputs) throws IOException, InterruptedException { if (inputs == null || inputs.size() == 0) { @@ -844,16 +854,16 @@ public void merge(List inputs) throws IOException, InterruptedExcepti numMemToDiskMerges.increment(1); inputContext.notifyProgress(); - - //name this output file same as the name of the first file that is + + //name this output file same as the name of the first file that is //there in the current list of inmem files (this is guaranteed to - //be absent on the disk currently. So we don't overwrite a prev. + //be absent on the disk currently. So we don't overwrite a prev. //created spill). Also we need to create the output file now since //it is not guaranteed that this file will be present after merge //is called (we delete empty files as soon as we see them //in the merge method) - //figure out the mapId + //figure out the mapId srcTaskIdentifier = inputs.get(0).getAttemptIdentifier(); List inMemorySegments = new ArrayList(); @@ -862,7 +872,7 @@ public void merge(List inputs) throws IOException, InterruptedExcepti int noInMemorySegments = inMemorySegments.size(); // TODO Maybe track serialized vs deserialized bytes. - + // All disk writes done by this merge are overhead - due to the lack of // adequate memory to keep all segments in memory. outputPath = mapOutputFile.getInputFileForWrite( @@ -873,20 +883,19 @@ public void merge(List inputs) throws IOException, InterruptedExcepti long outFileLen = 0; try { writer = - new Writer(conf, rfs, outputPath, - (Class)ConfigUtils.getIntermediateInputKeyClass(conf), - (Class)ConfigUtils.getIntermediateInputValueClass(conf), - codec, null, null); + new Writer(serializationContext.getKeySerialization(), + serializationContext.getValSerialization(), rfs, outputPath, + serializationContext.getKeyClass(), serializationContext.getValueClass(), codec, + null, null); TezRawKeyValueIterator rIter = null; - LOG.info("Initiating in-memory merge with " + noInMemorySegments + + LOG.info("Initiating in-memory merge with " + noInMemorySegments + " segments..."); tmpDir = new Path(inputContext.getUniqueIdentifier()); // Nothing actually materialized to disk - controlled by setting sort-factor to #segments. rIter = TezMerger.merge(conf, rfs, - (Class)ConfigUtils.getIntermediateInputKeyClass(conf), - (Class)ConfigUtils.getIntermediateInputValueClass(conf), + serializationContext, inMemorySegments, inMemorySegments.size(), tmpDir, (RawComparator)ConfigUtils.getIntermediateInputKeyComparator(conf), progressable, spilledRecordsCounter, null, additionalBytesRead, null); @@ -910,8 +919,8 @@ public void merge(List inputs) throws IOException, InterruptedExcepti " files in-memory complete." + " Local file is " + outputPath + " of size " + outFileLen); - } catch (IOException e) { - //make sure that we delete the ondisk file that we created + } catch (IOException e) { + //make sure that we delete the ondisk file that we created //earlier when we invoked cloneFileAttributes localFS.delete(outputPath, true); throw e; @@ -952,11 +961,12 @@ class OnDiskMerger extends MergeThread { public OnDiskMerger(MergeManager manager) { super(manager, ioSortFactor, exceptionReporter); - setName("DiskToDiskMerger [" + TezUtilsInternal - .cleanVertexName(inputContext.getSourceVertexName()) + "]"); + setName("DiskToDiskMerger [" + TezUtilsInternal + .cleanVertexName(inputContext.getSourceVertexName()) + + "_" + inputContext.getUniqueIdentifier() + "]"); setDaemon(true); } - + @Override public void merge(List inputs) throws IOException, InterruptedException { // sanity check @@ -968,10 +978,10 @@ public void merge(List inputs) throws IOException, InterruptedExcepti inputContext.notifyProgress(); long approxOutputSize = 0; - int bytesPerSum = + int bytesPerSum = conf.getInt("io.bytes.per.checksum", 512); - - LOG.info("OnDiskMerger: We have " + inputs.size() + + + LOG.info("OnDiskMerger: We have " + inputs.size() + " map outputs on disk. Triggering merge..."); List inputSegments = new ArrayList(inputs.size()); @@ -994,7 +1004,7 @@ public void merge(List inputs) throws IOException, InterruptedExcepti } // add the checksum length - approxOutputSize += + approxOutputSize += ChecksumFileSystem.getChecksumLength(approxOutputSize, bytesPerSum); // 2. Start the on-disk merge process @@ -1014,16 +1024,14 @@ public void merge(List inputs) throws IOException, InterruptedExcepti outputPath = localDirAllocator.getLocalPathForWrite(namePart, approxOutputSize, conf); outputPath = outputPath.suffix(Constants.MERGED_OUTPUT_PREFIX + mergeFileSequenceId.getAndIncrement()); - Writer writer = - new Writer(conf, rfs, outputPath, - (Class)ConfigUtils.getIntermediateInputKeyClass(conf), - (Class)ConfigUtils.getIntermediateInputValueClass(conf), - codec, null, null); + Writer writer = new Writer(serializationContext.getKeySerialization(), + serializationContext.getValSerialization(), rfs, outputPath, + serializationContext.getKeyClass(), serializationContext.getValueClass(), codec, null, + null); tmpDir = new Path(inputContext.getUniqueIdentifier()); try { TezRawKeyValueIterator iter = TezMerger.merge(conf, rfs, - (Class)ConfigUtils.getIntermediateInputKeyClass(conf), - (Class)ConfigUtils.getIntermediateInputValueClass(conf), + serializationContext, inputSegments, ioSortFactor, tmpDir, (RawComparator)ConfigUtils.getIntermediateInputKeyComparator(conf), @@ -1044,10 +1052,10 @@ public void merge(List inputs) throws IOException, InterruptedExcepti final long outputLen = localFS.getFileStatus(outputPath).getLen(); closeOnDiskFile(new FileChunk(outputPath, 0, outputLen)); - LOG.info(inputContext.getSourceVertexName() + - " Finished merging " + inputs.size() + - " map output files on disk of total-size " + - approxOutputSize + "." + + LOG.info(inputContext.getInputOutputVertexNames() + + " Finished merging " + inputs.size() + + " map output files on disk of total-size " + + approxOutputSize + "." + " Local output file is " + outputPath + " of size " + outputLen); } @@ -1066,9 +1074,9 @@ public void cleanup(List inputs, boolean deleteData) throws IOExcepti } } } - + private long createInMemorySegments(List inMemoryMapOutputs, - List inMemorySegments, + List inMemorySegments, long leaveBytes ) throws IOException { long totalSize = 0L; @@ -1085,11 +1093,11 @@ private long createInMemorySegments(List inMemoryMapOutputs, long size = data.length; totalSize += size; fullSize -= size; - IFile.Reader reader = new InMemoryReader(MergeManager.this, + IFile.Reader reader = new InMemoryReader(MergeManager.this, mo.getAttemptIdentifier(), data, 0, (int)size); inMemorySegments.add(new Segment(reader, - (mo.isPrimaryMapOutput() ? + (mo.isPrimaryMapOutput() ? mergedMapOutputsCounter : null))); } // Bulk remove removed in-memory map outputs efficiently @@ -1117,7 +1125,7 @@ public KeyState readRawKey(DataInputBuffer key) throws IOException { final int klen = kb.getLength() - kp; key.reset(kb.getData(), kp, klen); bytesRead += klen; - return KeyState.NEW_KEY; + return kvIter.isSameKey() ? KeyState.SAME_KEY : KeyState.NEW_KEY; } return KeyState.NO_KEY; } @@ -1148,12 +1156,11 @@ private TezRawKeyValueIterator finalMerge(Configuration job, FileSystem fs, logFinalMergeStart(inMemoryMapOutputs, onDiskMapOutputs); StringBuilder finalMergeLog = new StringBuilder(); - + inputContext.notifyProgress(); // merge config params - Class keyClass = (Class)ConfigUtils.getIntermediateInputKeyClass(job); - Class valueClass = (Class)ConfigUtils.getIntermediateInputValueClass(job); + SerializationContext serContext = new SerializationContext(job); final Path tmpDir = new Path(inputContext.getUniqueIdentifier()); final RawComparator comparator = (RawComparator)ConfigUtils.getIntermediateInputKeyComparator(job); @@ -1170,26 +1177,27 @@ private TezRawKeyValueIterator finalMerge(Configuration job, FileSystem fs, final int numMemDiskSegments = memDiskSegments.size(); if (numMemDiskSegments > 0 && ioSortFactor > onDiskMapOutputs.size()) { - + // If we reach here, it implies that we have less than io.sort.factor - // disk segments and this will be incremented by 1 (result of the - // memory segments merge). Since this total would still be + // disk segments and this will be incremented by 1 (result of the + // memory segments merge). Since this total would still be // <= io.sort.factor, we will not do any more intermediate merges, // the merge of all these disk segments would be directly fed to the // reduce method - + mergePhaseFinished = true; // must spill to disk, but can't retain in-mem for intermediate merge // Can not use spill id in final merge as it would clobber with other files, hence using // Integer.MAX_VALUE - final Path outputPath = + final Path outputPath = mapOutputFile.getInputFileForWrite(srcTaskId, Integer.MAX_VALUE, inMemToDiskBytes).suffix(Constants.MERGED_OUTPUT_PREFIX); - final TezRawKeyValueIterator rIter = TezMerger.merge(job, fs, keyClass, valueClass, + final TezRawKeyValueIterator rIter = TezMerger.merge(job, fs, serContext, memDiskSegments, numMemDiskSegments, tmpDir, comparator, progressable, spilledRecordsCounter, null, additionalBytesRead, null); - final Writer writer = new Writer(job, fs, outputPath, - keyClass, valueClass, codec, null, null); + final Writer writer = new Writer(serContext.getKeySerialization(), + serContext.getValSerialization(), fs, outputPath, serContext.getKeyClass(), + serContext.getValueClass(), codec, null, null); try { TezMerger.writeFile(rIter, writer, progressable, TezRuntimeConfiguration.TEZ_RUNTIME_RECORDS_BEFORE_PROGRESS_DEFAULT); } catch (IOException e) { @@ -1274,7 +1282,7 @@ public int compare(Segment o1, Segment o2) { // build final list of segments from merged backed by disk + in-mem List finalSegments = new ArrayList(); - long inMemBytes = createInMemorySegments(inMemoryMapOutputs, + long inMemBytes = createInMemorySegments(inMemoryMapOutputs, finalSegments, 0); if (LOG.isInfoEnabled()) { finalMergeLog.append(". MemSeg: " + finalSegments.size() + ", " + inMemBytes); @@ -1289,7 +1297,7 @@ public int compare(Segment o1, Segment o2) { diskSegments.addAll(0, memDiskSegments); memDiskSegments.clear(); TezRawKeyValueIterator diskMerge = TezMerger.merge( - job, fs, keyClass, valueClass, codec, diskSegments, + job, fs, serContext, codec, diskSegments, ioSortFactor, numInMemSegments, tmpDir, comparator, progressable, false, spilledRecordsCounter, null, additionalBytesRead, null); diskSegments.clear(); @@ -1303,10 +1311,10 @@ public int compare(Segment o1, Segment o2) { LOG.info(finalMergeLog.toString()); } // This is doing nothing but creating an iterator over the segments. - return TezMerger.merge(job, fs, keyClass, valueClass, - finalSegments, finalSegments.size(), tmpDir, - comparator, progressable, spilledRecordsCounter, null, - additionalBytesRead, null); + return TezMerger.merge(job, fs, serContext, codec, + finalSegments, finalSegments.size(), tmpDir, + comparator, progressable, spilledRecordsCounter, null, + additionalBytesRead, null); } diff --git a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/MergeThread.java b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/MergeThread.java index 52b4c5bbe9..c0af90fde0 100644 --- a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/MergeThread.java +++ b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/MergeThread.java @@ -36,6 +36,8 @@ abstract class MergeThread extends Thread { private final ExceptionReporter reporter; private boolean closed = false; private final int mergeFactor; + + private Thread shuffleSchedulerThread; public MergeThread(MergeManager manager, int mergeFactor, ExceptionReporter reporter) { @@ -60,6 +62,10 @@ public synchronized void close() throws InterruptedException { } } + public void setParentThread(Thread shuffleSchedulerThread) { + this.shuffleSchedulerThread = shuffleSchedulerThread; + } + public synchronized boolean isInProgress() { return inProgress; } @@ -81,7 +87,11 @@ public synchronized void startMerge(Set inputs) { public synchronized void waitForMerge() throws InterruptedException { while (inProgress) { - wait(); + if (shuffleSchedulerThread != null + && !shuffleSchedulerThread.isAlive()) { + return; + } + wait(5000); } } @@ -91,7 +101,11 @@ public void run() { // Wait for notification to start the merge... synchronized (this) { while (!inProgress) { - wait(); + if (shuffleSchedulerThread != null + && !shuffleSchedulerThread.isAlive()) { + return; + } + wait(5000); } } diff --git a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/Shuffle.java b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/Shuffle.java index 0089d8c1fe..9881c6e99a 100644 --- a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/Shuffle.java +++ b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/Shuffle.java @@ -39,9 +39,8 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.LocalDirAllocator; import org.apache.hadoop.io.compress.CompressionCodec; -import org.apache.hadoop.io.compress.DefaultCodec; -import org.apache.hadoop.util.ReflectionUtils; import org.apache.tez.common.CallableWithNdc; +import org.apache.tez.common.GuavaShim; import org.apache.tez.common.TezRuntimeFrameworkConfigs; import org.apache.tez.common.TezUtilsInternal; import org.apache.tez.common.counters.TaskCounter; @@ -50,13 +49,12 @@ import org.apache.tez.runtime.api.Event; import org.apache.tez.runtime.api.InputContext; import org.apache.tez.runtime.library.api.TezRuntimeConfiguration; -import org.apache.tez.runtime.library.common.ConfigUtils; import org.apache.tez.runtime.library.common.TezRuntimeUtils; import org.apache.tez.runtime.library.common.combine.Combiner; import org.apache.tez.runtime.library.common.sort.impl.TezRawKeyValueIterator; import org.apache.tez.runtime.library.exceptions.InputAlreadyClosedException; - -import com.google.common.base.Preconditions; +import org.apache.tez.runtime.library.utils.CodecUtils; +import org.apache.tez.common.Preconditions; import com.google.common.util.concurrent.FutureCallback; import com.google.common.util.concurrent.Futures; import com.google.common.util.concurrent.ListenableFuture; @@ -71,13 +69,13 @@ @InterfaceAudience.Private @InterfaceStability.Unstable public class Shuffle implements ExceptionReporter { - + private static final Logger LOG = LoggerFactory.getLogger(Shuffle.class); private static final int PROGRESS_FREQUENCY = 2000; - + private final Configuration conf; private final InputContext inputContext; - + private final ShuffleInputEventHandlerOrderedGrouped eventHandler; @VisibleForTesting final ShuffleScheduler scheduler; @@ -94,9 +92,9 @@ public class Shuffle implements ExceptionReporter { private final RunShuffleCallable runShuffleCallable; private volatile ListenableFuture runShuffleFuture; private final ListeningExecutorService executor; - - private final String srcNameTrimmed; - + + private final String sourceDestNameTrimmed; + private AtomicBoolean isShutDown = new AtomicBoolean(false); private AtomicBoolean fetchersClosed = new AtomicBoolean(false); private AtomicBoolean schedulerClosed = new AtomicBoolean(false); @@ -111,18 +109,11 @@ public Shuffle(InputContext inputContext, Configuration conf, int numInputs, this.inputContext = inputContext; this.conf = conf; - this.srcNameTrimmed = TezUtilsInternal.cleanVertexName(inputContext.getSourceVertexName()); - + this.sourceDestNameTrimmed = TezUtilsInternal.cleanVertexName(inputContext.getSourceVertexName()) + " -> " + + TezUtilsInternal.cleanVertexName(inputContext.getTaskVertexName()); + + this.codec = CodecUtils.getCodec(conf); - if (ConfigUtils.isIntermediateInputCompressed(conf)) { - Class codecClass = - ConfigUtils.getIntermediateInputCompressorClass(conf, DefaultCodec.class); - codec = ReflectionUtils.newInstance(codecClass, conf); - // Work around needed for HADOOP-12191. Avoids the native initialization synchronization race - codec.getDecompressorType(); - } else { - codec = null; - } this.ifileReadAhead = conf.getBoolean( TezRuntimeConfiguration.TEZ_RUNTIME_IFILE_READAHEAD, TezRuntimeConfiguration.TEZ_RUNTIME_IFILE_READAHEAD_DEFAULT); @@ -133,11 +124,11 @@ public Shuffle(InputContext inputContext, Configuration conf, int numInputs, } else { this.ifileReadAheadLength = 0; } - + Combiner combiner = TezRuntimeUtils.instantiateCombiner(conf, inputContext); - + FileSystem localFS = FileSystem.getLocal(this.conf); - LocalDirAllocator localDirAllocator = + LocalDirAllocator localDirAllocator = new LocalDirAllocator(TezRuntimeFrameworkConfigs.LOCAL_DIRS); // TODO TEZ Get rid of Map / Reduce references. @@ -148,7 +139,7 @@ public Shuffle(InputContext inputContext, Configuration conf, int numInputs, TezCounter mergedMapOutputsCounter = inputContext.getCounters().findCounter(TaskCounter.MERGED_MAP_OUTPUTS); - LOG.info(srcNameTrimmed + ": " + "Shuffle assigned with " + numInputs + " inputs" + ", codec: " + LOG.info(sourceDestNameTrimmed + ": " + "Shuffle assigned with " + numInputs + " inputs" + ", codec: " + (codec == null ? "None" : codec.getClass().getName()) + ", ifileReadAhead: " + ifileReadAhead); @@ -179,7 +170,7 @@ public Shuffle(InputContext inputContext, Configuration conf, int numInputs, codec, ifileReadAhead, ifileReadAheadLength, - srcNameTrimmed); + sourceDestNameTrimmed); this.mergePhaseTime = inputContext.getCounters().findCounter(TaskCounter.MERGE_PHASE_TIME); this.shufflePhaseTime = inputContext.getCounters().findCounter(TaskCounter.SHUFFLE_PHASE_TIME); @@ -190,9 +181,9 @@ public Shuffle(InputContext inputContext, Configuration conf, int numInputs, inputContext, scheduler, ShuffleUtils.isTezShuffleHandler(conf)); - + ExecutorService rawExecutor = Executors.newFixedThreadPool(1, new ThreadFactoryBuilder() - .setDaemon(true).setNameFormat("ShuffleAndMergeRunner {" + srcNameTrimmed + "}").build()); + .setDaemon(true).setNameFormat("ShuffleAndMergeRunner {" + sourceDestNameTrimmed + "}").build()); executor = MoreExecutors.listeningDecorator(rawExecutor); @@ -203,17 +194,17 @@ public void handleEvents(List events) throws IOException { if (!isShutDown.get()) { eventHandler.handleEvents(events); } else { - LOG.info(srcNameTrimmed + ": " + "Ignoring events since already shutdown. EventCount: " + events.size()); + LOG.info(sourceDestNameTrimmed + ": " + "Ignoring events since already shutdown. EventCount: " + events.size()); } } - + /** * Indicates whether the Shuffle and Merge processing is complete. * @return false if not complete, true if complete or if an error occurred. - * @throws InterruptedException - * @throws IOException - * @throws InputAlreadyClosedException + * @throws InterruptedException + * @throws IOException + * @throws InputAlreadyClosedException */ public boolean isInputReady() throws IOException, InterruptedException, TezException { if (isShutDown.get()) { @@ -270,14 +261,14 @@ public TezRawKeyValueIterator waitForInput() throws IOException, InterruptedExce public void run() throws IOException { merger.configureAndStart(); runShuffleFuture = executor.submit(runShuffleCallable); - Futures.addCallback(runShuffleFuture, new ShuffleRunnerFutureCallback()); + Futures.addCallback(runShuffleFuture, new ShuffleRunnerFutureCallback(), GuavaShim.directExecutor()); executor.shutdown(); } public void shutdown() { if (!isShutDown.getAndSet(true)) { // Interrupt so that the scheduler / merger sees this interrupt. - LOG.info("Shutting down Shuffle for source: " + srcNameTrimmed); + LOG.info("Shutting down Shuffle for source: " + sourceDestNameTrimmed); runShuffleFuture.cancel(true); cleanupIgnoreErrors(); } @@ -317,12 +308,12 @@ protected TezRawKeyValueIterator callInternal() throws IOException, InterruptedE try { kvIter = merger.close(true); } catch (Throwable e) { - // Set the throwable so that future.get() sees the reported errror. + // Set the throwable so that future.get() sees the reported error. throwable.set(e); throw new ShuffleError("Error while doing final merge ", e); } mergePhaseTime.setValue(System.currentTimeMillis() - startTime); - + inputContext.notifyProgress(); // Sanity check synchronized (Shuffle.this) { @@ -333,7 +324,7 @@ protected TezRawKeyValueIterator callInternal() throws IOException, InterruptedE } inputContext.inputIsReady(); - LOG.info("merge complete for input vertex : " + srcNameTrimmed); + LOG.info("merge complete for input vertex : " + sourceDestNameTrimmed); return kvIter; } } @@ -343,7 +334,8 @@ private void cleanupShuffleSchedulerIgnoreErrors() { cleanupShuffleScheduler(); } catch (InterruptedException e) { Thread.currentThread().interrupt(); - LOG.info(srcNameTrimmed + ": " + "Interrupted while attempting to close the scheduler during cleanup. Ignoring"); + LOG.info(sourceDestNameTrimmed + ": " + + "Interrupted while attempting to close the scheduler during cleanup. Ignoring"); } } @@ -361,13 +353,14 @@ private void cleanupMerger(boolean ignoreErrors) throws Throwable { if (ignoreErrors) { //Reset the status Thread.currentThread().interrupt(); - LOG.info(srcNameTrimmed + ": " + "Interrupted while attempting to close the merger during cleanup. Ignoring"); + LOG.info(sourceDestNameTrimmed + ": " + + "Interrupted while attempting to close the merger during cleanup. Ignoring"); } else { throw e; } } catch (Throwable e) { if (ignoreErrors) { - LOG.info(srcNameTrimmed + ": " + "Exception while trying to shutdown merger, Ignoring", e); + LOG.info(sourceDestNameTrimmed + ": " + "Exception while trying to shutdown merger, Ignoring", e); } else { throw e; } @@ -389,7 +382,7 @@ private void cleanupIgnoreErrors() { } cleanupMerger(true); } catch (Throwable t) { - LOG.info(srcNameTrimmed + ": " + "Error in cleaning up.., ", t); + LOG.info(sourceDestNameTrimmed + ": " + "Error in cleaning up.., ", t); } } @@ -398,11 +391,11 @@ private void cleanupIgnoreErrors() { public synchronized void reportException(Throwable t) { // RunShuffleCallable onFailure deals with ignoring errors on shutdown. if (throwable.get() == null) { - LOG.info(srcNameTrimmed + ": " + "Setting throwable in reportException with message [" + t.getMessage() + + LOG.info(sourceDestNameTrimmed + ": " + "Setting throwable in reportException with message [" + t.getMessage() + "] from thread [" + Thread.currentThread().getName()); throwable.set(t); throwingThreadName = Thread.currentThread().getName(); - // Notify the scheduler so that the reporting thread finds the + // Notify the scheduler so that the reporting thread finds the // exception immediately. cleanupShuffleSchedulerIgnoreErrors(); } @@ -416,7 +409,7 @@ public synchronized void killSelf(Exception exception, String message) { inputContext.killSelf(exception, message); } } - + public static class ShuffleError extends IOException { private static final long serialVersionUID = 5753909320586607881L; @@ -429,19 +422,19 @@ public static class ShuffleError extends IOException { public static long getInitialMemoryRequirement(Configuration conf, long maxAvailableTaskMemory) { return MergeManager.getInitialMemoryRequirement(conf, maxAvailableTaskMemory); } - + private class ShuffleRunnerFutureCallback implements FutureCallback { @Override public void onSuccess(TezRawKeyValueIterator result) { - LOG.info(srcNameTrimmed + ": " + "Shuffle Runner thread complete"); + LOG.info(sourceDestNameTrimmed + ": " + "Shuffle Runner thread complete"); } @Override public void onFailure(Throwable t) { if (isShutDown.get()) { - LOG.info(srcNameTrimmed + ": " + "Already shutdown. Ignoring error"); + LOG.info(sourceDestNameTrimmed + ": " + "Already shutdown. Ignoring error"); } else { - LOG.error(srcNameTrimmed + ": " + "ShuffleRunner failed with error", t); + LOG.error(sourceDestNameTrimmed + ": " + "ShuffleRunner failed with error", t); // In case of an abort / Interrupt - the runtime makes sure that this is ignored. inputContext.reportFailure(TaskFailureType.NON_FATAL, t, "Shuffle Runner Failed"); cleanupIgnoreErrors(); diff --git a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/ShuffleHeader.java b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/ShuffleHeader.java index 9f883dbfdc..f074e897e8 100644 --- a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/ShuffleHeader.java +++ b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/ShuffleHeader.java @@ -103,4 +103,11 @@ public void write(DataOutput out) throws IOException { WritableUtils.writeVLong(out, uncompressedLength); WritableUtils.writeVInt(out, forReduce); } + + @Override + public String toString() { + return String.format( + "ShuffleHeader [mapId=%s, uncompressedLength=%d, compressedLength=%d, forReduce=%d]", mapId, + uncompressedLength, compressedLength, forReduce); + } } diff --git a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/ShuffleInputEventHandlerOrderedGrouped.java b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/ShuffleInputEventHandlerOrderedGrouped.java index 116098fe26..2088c28bf4 100644 --- a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/ShuffleInputEventHandlerOrderedGrouped.java +++ b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/ShuffleInputEventHandlerOrderedGrouped.java @@ -30,8 +30,6 @@ import org.apache.tez.runtime.library.common.shuffle.ShuffleEventHandler; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import org.apache.hadoop.util.StringInterner; -import org.apache.hadoop.conf.Configuration; import org.apache.tez.common.TezCommonUtils; import org.apache.tez.common.TezUtilsInternal; import org.apache.tez.dag.api.TezUncheckedException; @@ -42,11 +40,12 @@ import org.apache.tez.runtime.library.common.InputAttemptIdentifier; import org.apache.tez.runtime.library.common.shuffle.ShuffleUtils; import org.apache.tez.runtime.library.shuffle.impl.ShuffleUserPayloads.DataMovementEventPayloadProto; +import org.apache.tez.util.StringInterner; import com.google.protobuf.InvalidProtocolBufferException; public class ShuffleInputEventHandlerOrderedGrouped implements ShuffleEventHandler { - + private static final Logger LOG = LoggerFactory.getLogger(ShuffleInputEventHandlerOrderedGrouped.class); private final ShuffleScheduler scheduler; @@ -77,7 +76,7 @@ public void handleEvents(List events) throws IOException { @Override public void logProgress(boolean updateOnClose) { - LOG.info(inputContext.getSourceVertexName() + ": " + LOG.info(inputContext.getInputOutputVertexNames() + ": " + "numDmeEventsSeen=" + numDmeEvents.get() + ", numDmeEventsSeenWithNoData=" + numDmeEventsNoData.get() + ", numObsoletionEventsSeen=" + numObsoletionEvents.get() @@ -156,11 +155,8 @@ private void processDataMovementEvent(DataMovementEvent dmEvent, DataMovementEve if (shufflePayload.hasEmptyPartitions()) { try { if (emptyPartitionsBitSet.get(partitionId)) { - if (LOG.isDebugEnabled()) { - LOG.debug( - "Source partition: " + partitionId + " did not generate any data. SrcAttempt: [" - + srcAttemptIdentifier + "]. Not fetching."); - } + LOG.debug("Source partition: {} did not generate any data. SrcAttempt: [{}]. Not fetching.", + partitionId, srcAttemptIdentifier); numDmeEventsNoData.getAndIncrement(); scheduler.copySucceeded(srcAttemptIdentifier.expand(0), null, 0, 0, 0, null, true); return; @@ -170,7 +166,7 @@ private void processDataMovementEvent(DataMovementEvent dmEvent, DataMovementEve } } - scheduler.addKnownMapOutput(StringInterner.weakIntern(shufflePayload.getHost()), shufflePayload.getPort(), + scheduler.addKnownMapOutput(StringInterner.intern(shufflePayload.getHost()), shufflePayload.getPort(), partitionId, srcAttemptIdentifier); } @@ -191,10 +187,8 @@ private void processCompositeRoutedDataMovementEvent(CompositeRoutedDataMovement allPartitionsEmpty &= emptyPartitionsBitSet.get(srcPartitionId); if (emptyPartitionsBitSet.get(srcPartitionId)) { InputAttemptIdentifier srcInputAttemptIdentifier = compositeInputAttemptIdentifier.expand(i); - if (LOG.isDebugEnabled()) { - LOG.debug("Source partition: " + srcPartitionId + " did not generate any data. SrcAttempt: [" - + srcInputAttemptIdentifier + "]. Not fetching."); - } + LOG.debug("Source partition: {} did not generate any data. SrcAttempt: [{}]. Not fetching.", + srcPartitionId, srcInputAttemptIdentifier); numDmeEventsNoData.getAndIncrement(); scheduler.copySucceeded(srcInputAttemptIdentifier, null, 0, 0, 0, null, true); } @@ -205,16 +199,14 @@ private void processCompositeRoutedDataMovementEvent(CompositeRoutedDataMovement } } - scheduler.addKnownMapOutput(StringInterner.weakIntern(shufflePayload.getHost()), shufflePayload.getPort(), + scheduler.addKnownMapOutput(StringInterner.intern(shufflePayload.getHost()), shufflePayload.getPort(), partitionId, compositeInputAttemptIdentifier); } private void processTaskFailedEvent(InputFailedEvent ifEvent) { InputAttemptIdentifier taIdentifier = new InputAttemptIdentifier(ifEvent.getTargetIndex(), ifEvent.getVersion()); scheduler.obsoleteInput(taIdentifier); - if (LOG.isDebugEnabled()) { - LOG.debug("Obsoleting output of src-task: " + taIdentifier); - } + LOG.debug("Obsoleting output of src-task: {}", taIdentifier); } /** @@ -228,7 +220,9 @@ private void processTaskFailedEvent(InputFailedEvent ifEvent) { */ private CompositeInputAttemptIdentifier constructInputAttemptIdentifier(int targetIndex, int targetIndexCount, int version, DataMovementEventPayloadProto shufflePayload) { - String pathComponent = (shufflePayload.hasPathComponent()) ? StringInterner.weakIntern(shufflePayload.getPathComponent()) : null; + String pathComponent = (shufflePayload.hasPathComponent()) + ? StringInterner.intern(shufflePayload.getPathComponent()) + : null; int spillEventId = shufflePayload.getSpillId(); CompositeInputAttemptIdentifier srcAttemptIdentifier = null; if (shufflePayload.hasSpillId()) { diff --git a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/ShuffleScheduler.java b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/ShuffleScheduler.java index b223c1a6cd..3fc7d63059 100644 --- a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/ShuffleScheduler.java +++ b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/ShuffleScheduler.java @@ -43,7 +43,8 @@ import java.util.concurrent.atomic.AtomicLong; import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; +import org.apache.tez.common.GuavaShim; +import org.apache.tez.common.Preconditions; import com.google.common.collect.LinkedListMultimap; import com.google.common.collect.ListMultimap; import com.google.common.collect.Maps; @@ -60,11 +61,12 @@ import org.apache.tez.http.HttpConnectionParams; import org.apache.tez.common.CallableWithNdc; import org.apache.tez.common.security.JobTokenSecretManager; -import org.apache.tez.dag.api.TezConstants; import org.apache.tez.runtime.library.common.CompositeInputAttemptIdentifier; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.RawLocalFileSystem; import org.apache.hadoop.io.IntWritable; import org.apache.tez.common.TezUtilsInternal; import org.apache.tez.common.counters.TaskCounter; @@ -78,6 +80,7 @@ import org.apache.tez.runtime.library.common.shuffle.ShuffleUtils; import org.apache.tez.runtime.library.common.shuffle.ShuffleUtils.FetchStatsLogger; import org.apache.tez.runtime.library.common.shuffle.HostPort; +import org.apache.tez.runtime.library.common.shuffle.InputAttemptFetchFailure; import org.apache.tez.runtime.library.common.shuffle.orderedgrouped.MapHost.HostPortPartition; import org.apache.tez.runtime.library.common.shuffle.orderedgrouped.MapOutput.Type; @@ -175,6 +178,7 @@ enum ShuffleErrors { private final Referee referee; @VisibleForTesting final Map failureCounts = new HashMap(); + final Set uniqueHosts = Sets.newHashSet(); private final Map hostFailures = new HashMap(); private final InputContext inputContext; @@ -214,11 +218,10 @@ enum ShuffleErrors { private final int ifileReadAheadLength; private final CompressionCodec codec; private final Configuration conf; + private final RawLocalFileSystem localFs; private final boolean localDiskFetchEnabled; private final String localHostname; private final int shufflePort; - private final String applicationId; - private final int dagId; private final boolean asyncHttp; private final boolean sslShuffle; @@ -243,8 +246,9 @@ enum ShuffleErrors { private final boolean checkFailedFetchSinceLastCompletion; private final boolean verifyDiskChecksum; private final boolean compositeFetch; - + private final boolean enableFetcherTestingErrors; private volatile Thread shuffleSchedulerThread = null; + private final int maxPenaltyTime; private long totalBytesShuffledTillNow = 0; private final DecimalFormat mbpsFormat = new DecimalFormat("0.00"); @@ -262,6 +266,7 @@ public ShuffleScheduler(InputContext inputContext, String srcNameTrimmed) throws IOException { this.inputContext = inputContext; this.conf = conf; + this.localFs = (RawLocalFileSystem) FileSystem.getLocal(conf).getRaw(); this.exceptionReporter = exceptionReporter; this.allocator = allocator; this.mergeManager = mergeManager; @@ -331,8 +336,6 @@ public ShuffleScheduler(InputContext inputContext, TezRuntimeConfiguration .TEZ_RUNTIME_SHUFFLE_FAILED_CHECK_SINCE_LAST_COMPLETION_DEFAULT); - this.applicationId = inputContext.getApplicationId().toString(); - this.dagId = inputContext.getDagIdentifier(); this.localHostname = inputContext.getExecutionContext().getHostName(); String auxiliaryService = conf.get(TezConfiguration.TEZ_AM_SHUFFLE_AUXILIARY_SERVICE_ID, TezConfiguration.TEZ_AM_SHUFFLE_AUXILIARY_SERVICE_ID_DEFAULT); @@ -393,13 +396,13 @@ public ShuffleScheduler(InputContext inputContext, this.maxFailedUniqueFetches = Math.min(numberOfInputs, 5); referee.start(); - this.maxFetchFailuresBeforeReporting = + this.maxFetchFailuresBeforeReporting = conf.getInt( TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_FAILURES_LIMIT, TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_FAILURES_LIMIT_DEFAULT); - this.reportReadErrorImmediately = + this.reportReadErrorImmediately = conf.getBoolean( - TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_NOTIFY_READERROR, + TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_NOTIFY_READERROR, TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_NOTIFY_READERROR_DEFAULT); this.verifyDiskChecksum = conf.getBoolean( TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_VERIFY_DISK_CHECKSUM, @@ -412,11 +415,17 @@ public ShuffleScheduler(InputContext inputContext, this.maxTaskOutputAtOnce = Math.max(1, Math.min(75, conf.getInt( TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_MAX_TASK_OUTPUT_AT_ONCE, TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_MAX_TASK_OUTPUT_AT_ONCE_DEFAULT))); - + this.skippedInputCounter = inputContext.getCounters().findCounter(TaskCounter.NUM_SKIPPED_INPUTS); this.firstEventReceived = inputContext.getCounters().findCounter(TaskCounter.FIRST_EVENT_RECEIVED); this.lastEventReceived = inputContext.getCounters().findCounter(TaskCounter.LAST_EVENT_RECEIVED); this.compositeFetch = ShuffleUtils.isTezShuffleHandler(conf); + this.maxPenaltyTime = conf.getInt(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_HOST_PENALTY_TIME_LIMIT_MS, + TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_HOST_PENALTY_TIME_LIMIT_MS_DEFAULT); + + this.enableFetcherTestingErrors = + conf.getBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_ENABLE_TESTING_ERRORS, + TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_ENABLE_TESTING_ERRORS_DEFAULT); pipelinedShuffleInfoEventsMap = Maps.newConcurrentMap(); LOG.info("ShuffleScheduler running for sourceVertex: " @@ -433,11 +442,14 @@ public ShuffleScheduler(InputContext inputContext, + ", maxStallTimeFraction=" + maxStallTimeFraction + ", minReqProgressFraction=" + minReqProgressFraction + ", checkFailedFetchSinceLastCompletion=" + checkFailedFetchSinceLastCompletion + + ", asyncHttp=" + asyncHttp + + ", enableFetcherTestingErrors=" + enableFetcherTestingErrors ); } public void start() throws Exception { shuffleSchedulerThread = Thread.currentThread(); + mergeManager.setupParentThread(shuffleSchedulerThread); ShuffleSchedulerCallable schedulerCallable = new ShuffleSchedulerCallable(); schedulerCallable.call(); } @@ -657,7 +669,7 @@ public synchronized void copySucceeded(InputAttemptIdentifier srcAttemptIdentifi if (remainingMaps.get() == 0) { notifyAll(); // Notify the getHost() method. - LOG.info("All inputs fetched for input vertex : " + inputContext.getSourceVertexName()); + LOG.info("All inputs fetched for input vertex : " + inputContext.getInputOutputVertexNames()); } // update the status @@ -677,7 +689,7 @@ public synchronized void copySucceeded(InputAttemptIdentifier srcAttemptIdentifi LOG.warn("Duplicate fetch of input no longer needs to be fetched: " + srcAttemptIdentifier); // free the resource - specially memory - + // If the src does not generate data, output will be null. if (output != null) { output.abort(); @@ -747,23 +759,22 @@ private void logProgress() { } } - public synchronized void copyFailed(InputAttemptIdentifier srcAttempt, - MapHost host, - boolean readError, - boolean connectError, - boolean isLocalFetch) { + public void copyFailed(InputAttemptFetchFailure fetchFailure, MapHost host, + boolean readError, boolean connectError) { failedShuffleCounter.increment(1); inputContext.notifyProgress(); - int failures = incrementAndGetFailureAttempt(srcAttempt); + int failures; - if (!isLocalFetch) { - /** - * Track the number of failures that has happened since last completion. - * This gets reset on a successful copy. - */ - failedShufflesSinceLastCompletion++; + synchronized (this) { + failures = incrementAndGetFailureAttempt(fetchFailure.getInputAttemptIdentifier()); + if (!fetchFailure.isLocalFetch()) { + /** + * Track the number of failures that has happened since last completion. + * This gets reset on a successful copy. + */ + failedShufflesSinceLastCompletion++; + } } - /** * Inform AM: * - In case of read/connect error @@ -781,18 +792,22 @@ public synchronized void copyFailed(InputAttemptIdentifier srcAttempt, if (shouldInformAM) { //Inform AM. In case producer needs to be restarted, it is handled at AM. - informAM(srcAttempt); + informAM(fetchFailure); } //Restart consumer in case shuffle is not healthy - if (!isShuffleHealthy(srcAttempt)) { + try { + checkShuffleHealthy(fetchFailure); + } catch (IOException e) { + // reportException should be called outside synchronized(this) due to TEZ-4334 + exceptionReporter.reportException(e); return; } penalizeHost(host, failures); } - private boolean isAbortLimitExceeedFor(InputAttemptIdentifier srcAttempt) { + private void isAbortLimitExceeedFor(InputAttemptIdentifier srcAttempt) throws IOException { int attemptFailures = getFailureCount(srcAttempt); if (attemptFailures >= abortFailureLimit) { // This task has seen too many fetch failures - report it as failed. The @@ -807,15 +822,11 @@ private boolean isAbortLimitExceeedFor(InputAttemptIdentifier srcAttempt) { inputContext.getSourceVertexName(), srcAttempt.getInputIdentifier(), srcAttempt.getAttemptNumber()) + ". threshold=" + abortFailureLimit; - IOException ioe = new IOException(errorMsg); - // Shuffle knows how to deal with failures post shutdown via the onFailure hook - exceptionReporter.reportException(ioe); - return true; + throw new IOException(errorMsg); } - return false; } - private void penalizeHost(MapHost host, int failures) { + private synchronized void penalizeHost(MapHost host, int failures) { host.penalize(); HostPort hostPort = new HostPort(host.getHost(), host.getPort()); @@ -831,7 +842,8 @@ private void penalizeHost(MapHost host, int failures) { long delay = (long) (INITIAL_PENALTY * Math.pow(PENALTY_GROWTH_RATE, failures)); - penalties.add(new Penalty(host, delay)); + long penaltyDelay = Math.min(delay, maxPenaltyTime); + penalties.add(new Penalty(host, penaltyDelay)); } private int getFailureCount(InputAttemptIdentifier srcAttempt) { @@ -859,21 +871,24 @@ public void reportLocalError(IOException ioe) { } // Notify AM - private void informAM(InputAttemptIdentifier srcAttempt) { + private void informAM(InputAttemptFetchFailure fetchFailure) { + InputAttemptIdentifier srcAttempt = fetchFailure.getInputAttemptIdentifier(); LOG.info( - srcNameTrimmed + ": " + "Reporting fetch failure for InputIdentifier: " - + srcAttempt + " taskAttemptIdentifier: " + TezRuntimeUtils - .getTaskAttemptIdentifier(inputContext.getSourceVertexName(), - srcAttempt.getInputIdentifier(), - srcAttempt.getAttemptNumber()) + " to AM."); + "{}: Reporting fetch failure for InputIdentifier: {} taskAttemptIdentifier: {}, " + + "local fetch: {}, remote fetch failure reported as local failure: {}) to AM.", + srcNameTrimmed, srcAttempt, + TezRuntimeUtils.getTaskAttemptIdentifier(inputContext.getSourceVertexName(), + srcAttempt.getInputIdentifier(), srcAttempt.getAttemptNumber()), + fetchFailure.isLocalFetch(), fetchFailure.isDiskErrorAtSource()); List failedEvents = Lists.newArrayListWithCapacity(1); - failedEvents.add(InputReadErrorEvent.create( - "Fetch failure for " + TezRuntimeUtils - .getTaskAttemptIdentifier(inputContext.getSourceVertexName(), - srcAttempt.getInputIdentifier(), - srcAttempt.getAttemptNumber()) + " to jobtracker.", - srcAttempt.getInputIdentifier(), - srcAttempt.getAttemptNumber())); + failedEvents.add( + InputReadErrorEvent.create( + "Fetch failure for " + + TezRuntimeUtils.getTaskAttemptIdentifier(inputContext.getSourceVertexName(), + srcAttempt.getInputIdentifier(), srcAttempt.getAttemptNumber()) + + " to jobtracker.", + srcAttempt.getInputIdentifier(), srcAttempt.getAttemptNumber(), + fetchFailure.isLocalFetch(), fetchFailure.isDiskErrorAtSource(), localHostname)); inputContext.sendEvents(failedEvents); } @@ -995,37 +1010,38 @@ private boolean isFetcherHealthy(String logContext) { return fetcherHealthy; } - boolean isShuffleHealthy(InputAttemptIdentifier srcAttempt) { - - if (isAbortLimitExceeedFor(srcAttempt)) { - return false; - } - - final float MIN_REQUIRED_PROGRESS_PERCENT = minReqProgressFraction; - final float MAX_ALLOWED_STALL_TIME_PERCENT = maxStallTimeFraction; + /** + * This method checks if the current shuffle is healthy and throw IOException if it's not, + * then the caller is supposed to handle the IOException. + */ + private synchronized void checkShuffleHealthy(InputAttemptFetchFailure fetchFailure) + throws IOException { + InputAttemptIdentifier srcAttempt = fetchFailure.getInputAttemptIdentifier(); + // supposed to throw IOException if exceeded + isAbortLimitExceeedFor(srcAttempt); int doneMaps = numInputs - remainingMaps.get(); String logContext = "srcAttempt=" + srcAttempt.toString(); boolean fetcherHealthy = isFetcherHealthy(logContext); - + // check if the reducer has progressed enough boolean reducerProgressedEnough = (((float)doneMaps / numInputs) - >= MIN_REQUIRED_PROGRESS_PERCENT); + >= minReqProgressFraction); // check if the reducer is stalled for a long time // duration for which the reducer is stalled int stallDuration = (int)(System.currentTimeMillis() - lastProgressTime); - + // duration for which the reducer ran with progress int shuffleProgressDuration = (int)(lastProgressTime - startTime); boolean reducerStalled = (shuffleProgressDuration > 0) && (((float)stallDuration / shuffleProgressDuration) - >= MAX_ALLOWED_STALL_TIME_PERCENT); + >= maxStallTimeFraction); // kill if not healthy and has insufficient progress if ((failureCounts.size() >= maxFailedUniqueFetches || @@ -1033,21 +1049,21 @@ boolean isShuffleHealthy(InputAttemptIdentifier srcAttempt) { && !fetcherHealthy && (!reducerProgressedEnough || reducerStalled)) { String errorMsg = (srcNameTrimmed + ": " - + "Shuffle failed with too many fetch failures and insufficient progress!" - + "failureCounts=" + failureCounts.size() + + "Shuffle failed with too many fetch failures and insufficient progress: " + + "[failureCounts=" + failureCounts.size() + ", pendingInputs=" + (numInputs - doneMaps) + ", fetcherHealthy=" + fetcherHealthy + ", reducerProgressedEnough=" + reducerProgressedEnough - + ", reducerStalled=" + reducerStalled); + + ", reducerStalled=" + reducerStalled + + ", hostFailures=" + hostFailures) + + "]"; LOG.error(errorMsg); if (LOG.isDebugEnabled()) { LOG.debug("Host failures=" + hostFailures.keySet()); } // Shuffle knows how to deal with failures post shutdown via the onFailure hook - exceptionReporter.reportException(new IOException(errorMsg)); - return false; + throw new IOException(errorMsg, fetchFailure.getCause()); } - return true; } public synchronized void addKnownMapOutput(String inputHostName, @@ -1080,7 +1096,7 @@ public synchronized void addKnownMapOutput(String inputHostName, notifyAll(); } } - + public void obsoleteInput(InputAttemptIdentifier srcAttempt) { // The incoming srcAttempt does not contain a path component. LOG.info(srcNameTrimmed + ": " + "Adding obsolete input: " + srcAttempt); @@ -1093,14 +1109,12 @@ public void obsoleteInput(InputAttemptIdentifier srcAttempt) { if (eventInfo.eventsProcessed.isEmpty() && !eventInfo.scheduledForDownload) { // obsoleted anyways; no point tracking if nothing is started pipelinedShuffleInfoEventsMap.remove(srcAttempt.getInputIdentifier()); - if (LOG.isDebugEnabled()) { - LOG.debug("Removing " + eventInfo + " from tracking"); - } + LOG.debug("Removing {} from tracking", eventInfo); return; } IOException exception = new IOException(srcAttempt + " is marked as obsoleteInput, but it " + "exists in shuffleInfoEventMap. Some data could have been already merged " - + "to memory/disk outputs. Failing the fetch early. eventInfo:" + eventInfo.toString()); + + "to memory/disk outputs. Failing the fetch early. eventInfo: " + eventInfo); String message = "Got obsolete event. Killing self as attempt's data could have been consumed"; killSelf(exception, message); return; @@ -1109,7 +1123,7 @@ public void obsoleteInput(InputAttemptIdentifier srcAttempt) { obsoleteInputs.add(srcAttempt); } } - + public synchronized void putBackKnownMapOutput(MapHost host, InputAttemptIdentifier srcAttempt) { host.addKnownMap(srcAttempt); @@ -1117,10 +1131,8 @@ public synchronized void putBackKnownMapOutput(MapHost host, public synchronized MapHost getHost() throws InterruptedException { while (pendingHosts.isEmpty() && remainingMaps.get() > 0) { - if (LOG.isDebugEnabled()) { - LOG.debug("PendingHosts=" + pendingHosts); - } - wait(); + LOG.debug("PendingHosts={}", pendingHosts); + waitAndNotifyProgress(); } if (!pendingHosts.isEmpty()) { @@ -1149,7 +1161,12 @@ public InputAttemptIdentifier getIdentifierForFetchedOutput( String path, int reduceId) { return pathToIdentifierMap.get(new PathPartition(path, reduceId)); } - + + @VisibleForTesting + DelayQueue getPenalties() { + return penalties; + } + private synchronized boolean inputShouldBeConsumed(InputAttemptIdentifier id) { boolean isInputFinished = false; if (id instanceof CompositeInputAttemptIdentifier) { @@ -1158,7 +1175,19 @@ private synchronized boolean inputShouldBeConsumed(InputAttemptIdentifier id) { } else { isInputFinished = isInputFinished(id.getInputIdentifier()); } - return !obsoleteInputs.contains(id) && !isInputFinished; + return !isObsoleteInputAttemptIdentifier(id) && !isInputFinished; + } + + private boolean isObsoleteInputAttemptIdentifier(InputAttemptIdentifier input) { + InputAttemptIdentifier obsoleteInput; + Iterator obsoleteInputsIter = obsoleteInputs.iterator(); + while (obsoleteInputsIter.hasNext()) { + obsoleteInput = obsoleteInputsIter.next(); + if (input.includes(obsoleteInput)) { + return true; + } + } + return false; } public synchronized List getMapsForHost(MapHost host) { @@ -1281,10 +1310,10 @@ public synchronized boolean isDone() { /** * A structure that records the penalty for a host. */ - private static class Penalty implements Delayed { + static class Penalty implements Delayed { MapHost host; private long endTime; - + Penalty(MapHost host, long delay) { this.host = host; this.endTime = System.currentTimeMillis() + delay; @@ -1299,7 +1328,7 @@ public int compareTo(Delayed o) { long other = ((Penalty) o).endTime; return endTime == other ? 0 : (endTime < other ? -1 : 1); } - + } /** @@ -1333,7 +1362,7 @@ public void run() { } } } - + void setInputFinished(int inputIndex) { synchronized(finishedMaps) { @@ -1360,26 +1389,26 @@ private class ShuffleSchedulerCallable extends CallableWithNdc { protected Void callInternal() throws InterruptedException { while (!isShutdown.get() && remainingMaps.get() > 0) { synchronized (ShuffleScheduler.this) { - if (runningFetchers.size() >= numFetchers || pendingHosts.isEmpty()) { - if (remainingMaps.get() > 0) { - try { - ShuffleScheduler.this.wait(); - } catch (InterruptedException e) { - if (isShutdown.get()) { - LOG.info(srcNameTrimmed + ": " + - "Interrupted while waiting for fetchers to complete and hasBeenShutdown. Breaking out of ShuffleSchedulerCallable loop"); - Thread.currentThread().interrupt(); - break; - } else { - throw e; - } + while ((runningFetchers.size() >= numFetchers || pendingHosts.isEmpty()) + && remainingMaps.get() > 0) { + try { + waitAndNotifyProgress(); + } catch (InterruptedException e) { + if (isShutdown.get()) { + LOG.info(srcNameTrimmed + ": " + + "Interrupted while waiting for fetchers to complete " + + "and hasBeenShutdown. Breaking out of ShuffleSchedulerCallable loop"); + Thread.currentThread().interrupt(); + break; + } else { + throw e; } } } } if (LOG.isDebugEnabled()) { - LOG.debug(srcNameTrimmed + ": " + "NumCompletedInputs: {}" + (numInputs - remainingMaps.get())); + LOG.debug("{}: NumCompletedInputs: {}", srcNameTrimmed, (numInputs - remainingMaps.get())); } // Ensure there's memory available before scheduling the next Fetcher. @@ -1420,9 +1449,7 @@ protected Void callInternal() throws InterruptedException { if (mapHost == null) { break; // Check for the exit condition. } - if (LOG.isDebugEnabled()) { - LOG.debug(srcNameTrimmed + ": " + "Processing pending host: " + mapHost.toString()); - } + LOG.debug("{}: Processing pending host: {}", srcNameTrimmed, mapHost); if (!isShutdown.get()) { count++; if (LOG.isDebugEnabled()) { @@ -1432,7 +1459,7 @@ protected Void callInternal() throws InterruptedException { FetcherOrderedGrouped fetcherOrderedGrouped = constructFetcherForHost(mapHost); runningFetchers.add(fetcherOrderedGrouped); ListenableFuture future = fetcherExecutor.submit(fetcherOrderedGrouped); - Futures.addCallback(future, new FetchFutureCallback(fetcherOrderedGrouped)); + Futures.addCallback(future, new FetchFutureCallback(fetcherOrderedGrouped), GuavaShim.directExecutor()); } } } @@ -1446,14 +1473,28 @@ protected Void callInternal() throws InterruptedException { } } + private synchronized void waitAndNotifyProgress() throws InterruptedException { + inputContext.notifyProgress(); + wait(1000); + } + @VisibleForTesting FetcherOrderedGrouped constructFetcherForHost(MapHost mapHost) { - return new FetcherOrderedGrouped(httpConnectionParams, ShuffleScheduler.this, allocator, - exceptionReporter, jobTokenSecretManager, ifileReadAhead, ifileReadAheadLength, - codec, conf, localDiskFetchEnabled, localHostname, shufflePort, srcNameTrimmed, mapHost, - ioErrsCounter, wrongLengthErrsCounter, badIdErrsCounter, wrongMapErrsCounter, - connectionErrsCounter, wrongReduceErrsCounter, applicationId, dagId, asyncHttp, sslShuffle, - verifyDiskChecksum, compositeFetch); + if (enableFetcherTestingErrors) { + return new FetcherOrderedGroupedWithInjectableErrors(httpConnectionParams, ShuffleScheduler.this, allocator, + exceptionReporter, jobTokenSecretManager, ifileReadAhead, ifileReadAheadLength, + codec, conf, localFs, localDiskFetchEnabled, localHostname, shufflePort, mapHost, + ioErrsCounter, wrongLengthErrsCounter, badIdErrsCounter, wrongMapErrsCounter, + connectionErrsCounter, wrongReduceErrsCounter, asyncHttp, sslShuffle, + verifyDiskChecksum, compositeFetch, inputContext); + } else { + return new FetcherOrderedGrouped(httpConnectionParams, ShuffleScheduler.this, allocator, + exceptionReporter, jobTokenSecretManager, ifileReadAhead, ifileReadAheadLength, + codec, conf, localFs, localDiskFetchEnabled, localHostname, shufflePort, mapHost, + ioErrsCounter, wrongLengthErrsCounter, badIdErrsCounter, wrongMapErrsCounter, + connectionErrsCounter, wrongReduceErrsCounter, asyncHttp, sslShuffle, + verifyDiskChecksum, compositeFetch, inputContext); + } } private class FetchFutureCallback implements FutureCallback { diff --git a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/sort/impl/ExternalSorter.java b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/sort/impl/ExternalSorter.java index b6fe4575de..232d964307 100644 --- a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/sort/impl/ExternalSorter.java +++ b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/sort/impl/ExternalSorter.java @@ -21,11 +21,14 @@ import java.io.File; import java.io.IOException; import java.io.InputStream; +import java.util.Collections; import java.util.Iterator; +import java.util.List; import java.util.Map; import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.Maps; +import org.apache.tez.runtime.api.Event; import org.apache.tez.runtime.api.OutputStatisticsReporter; import org.apache.tez.runtime.library.api.IOInterruptedException; import org.slf4j.Logger; @@ -39,9 +42,6 @@ import org.apache.hadoop.fs.RawLocalFileSystem; import org.apache.hadoop.io.RawComparator; import org.apache.hadoop.io.compress.CompressionCodec; -import org.apache.hadoop.io.compress.Compressor; -import org.apache.hadoop.io.compress.DefaultCodec; -import org.apache.hadoop.io.serializer.SerializationFactory; import org.apache.hadoop.io.serializer.Serializer; import org.apache.hadoop.util.IndexedSorter; import org.apache.hadoop.util.Progressable; @@ -57,22 +57,24 @@ import org.apache.tez.runtime.library.common.ConfigUtils; import org.apache.tez.runtime.library.common.TezRuntimeUtils; import org.apache.tez.runtime.library.common.combine.Combiner; +import org.apache.tez.runtime.library.common.serializer.SerializationContext; import org.apache.tez.runtime.library.common.shuffle.orderedgrouped.ShuffleHeader; import org.apache.tez.runtime.library.common.sort.impl.IFile.Writer; import org.apache.tez.runtime.library.common.task.local.output.TezTaskOutput; +import org.apache.tez.runtime.library.utils.CodecUtils; +import org.apache.tez.common.Preconditions; -import com.google.common.base.Preconditions; - -@SuppressWarnings({"unchecked", "rawtypes"}) +@SuppressWarnings({"rawtypes"}) public abstract class ExternalSorter { private static final Logger LOG = LoggerFactory.getLogger(ExternalSorter.class); - public void close() throws IOException { + public List close() throws IOException { spillFileIndexPaths.clear(); spillFilePaths.clear(); reportStatistics(); outputContext.notifyProgress(); + return Collections.emptyList(); } public abstract void flush() throws IOException; @@ -98,13 +100,13 @@ public void progress() { protected final Combiner combiner; protected final Partitioner partitioner; protected final Configuration conf; + protected final RawLocalFileSystem localFs; protected final FileSystem rfs; protected final TezTaskOutput mapOutputFile; protected final int partitions; - protected final Class keyClass; - protected final Class valClass; protected final RawComparator comparator; - protected final SerializationFactory serializationFactory; + + protected final SerializationContext serializationContext; protected final Serializer keySerializer; protected final Serializer valSerializer; @@ -129,6 +131,7 @@ public void progress() { protected final boolean cleanup; protected OutputStatisticsReporter statsReporter; + // uncompressed size for each partition protected final long[] partitionStats; protected final boolean finalMergeEnabled; protected final boolean sendEmptyPartitionDetails; @@ -167,6 +170,7 @@ public ExternalSorter(OutputContext outputContext, Configuration conf, int numOu long initialMemoryAvailable) throws IOException { this.outputContext = outputContext; this.conf = conf; + this.localFs = (RawLocalFileSystem) FileSystem.getLocal(conf).getRaw(); this.partitions = numOutputs; reportPartitionStats = ReportPartitionStats.fromString( conf.get(TezRuntimeConfiguration.TEZ_RUNTIME_REPORT_PARTITION_STATS, @@ -180,7 +184,7 @@ public ExternalSorter(OutputContext outputContext, Configuration conf, int numOu rfs = ((LocalFileSystem)FileSystem.getLocal(this.conf)).getRaw(); if (LOG.isDebugEnabled()) { - LOG.debug(outputContext.getDestinationVertexName() + ": Initial Mem bytes : " + + LOG.debug(outputContext.getInputOutputVertexNames() + ": Initial Mem bytes : " + initialMemoryAvailable + ", in MB=" + ((initialMemoryAvailable >> 20))); } int assignedMb = (int) (initialMemoryAvailable >> 20); @@ -195,14 +199,12 @@ public ExternalSorter(OutputContext outputContext, Configuration conf, int numOu comparator = ConfigUtils.getIntermediateOutputKeyComparator(this.conf); // k/v serialization - keyClass = ConfigUtils.getIntermediateOutputKeyClass(this.conf); - valClass = ConfigUtils.getIntermediateOutputValueClass(this.conf); - serializationFactory = new SerializationFactory(this.conf); - keySerializer = serializationFactory.getSerializer(keyClass); - valSerializer = serializationFactory.getSerializer(valClass); - LOG.info(outputContext.getDestinationVertexName() + " using: " + this.serializationContext = new SerializationContext(this.conf); + keySerializer = serializationContext.getKeySerializer(); + valSerializer = serializationContext.getValueSerializer(); + LOG.info(outputContext.getInputOutputVertexNames() + " using: " + "memoryMb=" + assignedMb - + ", keySerializerClass=" + keyClass + + ", keySerializerClass=" + serializationContext.getKeyClass() + ", valueSerializerClass=" + valSerializer + ", comparator=" + (RawComparator) ConfigUtils.getIntermediateOutputKeyComparator(conf) + ", partitioner=" + conf.get(TezRuntimeConfiguration.TEZ_RUNTIME_PARTITIONER_CLASS) @@ -221,30 +223,7 @@ public ExternalSorter(OutputContext outputContext, Configuration conf, int numOu numShuffleChunks = outputContext.getCounters().findCounter(TaskCounter.SHUFFLE_CHUNK_COUNT); // compression - if (ConfigUtils.shouldCompressIntermediateOutput(this.conf)) { - Class codecClass = - ConfigUtils.getIntermediateOutputCompressorClass(this.conf, DefaultCodec.class); - codec = ReflectionUtils.newInstance(codecClass, this.conf); - - if (codec != null) { - Class compressorType = null; - Throwable cause = null; - try { - compressorType = codec.getCompressorType(); - } catch (RuntimeException e) { - cause = e; - } - if (compressorType == null) { - String errMsg = - String.format("Unable to get CompressorType for codec (%s). This is most" + - " likely due to missing native libraries for the codec.", - conf.get(TezRuntimeConfiguration.TEZ_RUNTIME_COMPRESS_CODEC)); - throw new IOException(errMsg, cause); - } - } - } else { - codec = null; - } + this.codec = CodecUtils.getCodec(conf); this.ifileReadAhead = this.conf.getBoolean( TezRuntimeConfiguration.TEZ_RUNTIME_IFILE_READAHEAD, diff --git a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/sort/impl/IFile.java b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/sort/impl/IFile.java index b502fc95f5..671a426b66 100644 --- a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/sort/impl/IFile.java +++ b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/sort/impl/IFile.java @@ -23,22 +23,25 @@ import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; +import java.nio.ByteBuffer; import java.util.Iterator; import java.util.concurrent.atomic.AtomicBoolean; import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.io.BoundedByteArrayOutputStream; +import org.apache.tez.runtime.library.common.task.local.output.TezTaskOutput; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; -import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.DataInputBuffer; import org.apache.hadoop.io.DataOutputBuffer; import org.apache.tez.runtime.library.utils.BufferUtils; +import org.apache.tez.runtime.library.utils.CodecUtils; import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.io.WritableUtils; import org.apache.hadoop.io.compress.CodecPool; @@ -46,7 +49,7 @@ import org.apache.hadoop.io.compress.CompressionOutputStream; import org.apache.hadoop.io.compress.Compressor; import org.apache.hadoop.io.compress.Decompressor; -import org.apache.hadoop.io.serializer.SerializationFactory; +import org.apache.hadoop.io.serializer.Serialization; import org.apache.hadoop.io.serializer.Serializer; import org.apache.tez.common.counters.TezCounter; @@ -59,7 +62,7 @@ */ @InterfaceAudience.Private @InterfaceStability.Unstable -public class IFile { +public final class IFile { private static final Logger LOG = LoggerFactory.getLogger(IFile.class); public static final int EOF_MARKER = -1; // End of File Marker public static final int RLE_MARKER = -2; // Repeat same key marker @@ -71,6 +74,184 @@ public class IFile { private static final String INCOMPLETE_READ = "Requested to read %d got %d"; private static final String REQ_BUFFER_SIZE_TOO_LARGE = "Size of data %d is greater than the max allowed of %d"; + private IFile() {} + + /** + * IFileWriter which stores data in memory for specified limit, beyond + * which it falls back to file based writer. It creates files lazily on + * need basis and avoids any disk hit (in cases, where data fits entirely in mem). + *

    + * This class should not make any changes to IFile logic and should just flip streams + * from mem to disk on need basis. + * + * During write, it verifies whether uncompressed payload can fit in memory. If so, it would + * store in buffer. Otherwise, it falls back to file based writer. Note that data stored + * internally would be in compressed format (if codec is provided). However, for easier + * comparison and spill over, uncompressed payload check is done. This is + * done intentionally, as it is not possible to know compressed data length + * upfront. + */ + public static class FileBackedInMemIFileWriter extends Writer { + + private final FileSystem fs; + private boolean bufferFull; + + // For lazy creation of file + private final TezTaskOutput taskOutput; + private int totalSize; + + private Path outputPath; + private final CompressionCodec fileCodec; + private final BoundedByteArrayOutputStream cacheStream; + + private static final int checksumSize = IFileOutputStream.getCheckSumSize(); + + /** + * Note that we do not allow compression in in-mem stream. + * When spilled over to file, compression gets enabled. + */ + public FileBackedInMemIFileWriter(Serialization keySerialization, + Serialization valSerialization, FileSystem fs, TezTaskOutput taskOutput, + Class keyClass, Class valueClass, CompressionCodec codec, TezCounter writesCounter, + TezCounter serializedBytesCounter, int cacheSize) throws IOException { + super(keySerialization, valSerialization, new FSDataOutputStream(createBoundedBuffer(cacheSize), null), + keyClass, valueClass, null, writesCounter, serializedBytesCounter); + this.fs = fs; + this.cacheStream = (BoundedByteArrayOutputStream) this.rawOut.getWrappedStream(); + this.taskOutput = taskOutput; + this.bufferFull = (cacheStream == null); + this.totalSize = getBaseCacheSize(); + this.fileCodec = codec; + } + + /** + * For basic cache size checks: header + checksum + EOF marker + * + * @return size of the base cache needed + */ + static int getBaseCacheSize() { + return (HEADER.length + checksumSize + + (2 * WritableUtils.getVIntSize(EOF_MARKER))); + } + + boolean shouldWriteToDisk() { + return totalSize >= cacheStream.getLimit(); + } + + /** + * Create in mem stream. In it is too small, adjust it's size + * + * @return in memory stream + */ + public static BoundedByteArrayOutputStream createBoundedBuffer(int size) { + int resize = Math.max(getBaseCacheSize(), size); + return new BoundedByteArrayOutputStream(resize); + } + + /** + * Flip over from memory to file based writer. + * + * 1. Content format: HEADER + real data + CHECKSUM. Checksum is for real + * data. + * 2. Before flipping, close checksum stream, so that checksum is written + * out. + * 3. Create relevant file based writer. + * 4. Write header and then real data. + */ + private void resetToFileBasedWriter() throws IOException { + // Close out stream, so that data checksums are written. + // Buf contents = HEADER + real data + CHECKSUM + this.out.close(); + + // Get the buffer which contains data in memory + BoundedByteArrayOutputStream bout = + (BoundedByteArrayOutputStream) this.rawOut.getWrappedStream(); + + // Create new file based writer + if (outputPath == null) { + outputPath = taskOutput.getOutputFileForWrite(); + } + LOG.info("Switching from mem stream to disk stream. File: " + outputPath); + FSDataOutputStream newRawOut = fs.create(outputPath); + this.rawOut = newRawOut; + this.ownOutputStream = true; + + setupOutputStream(fileCodec); + + // Write header to file + headerWritten = false; + writeHeader(newRawOut); + + // write real data + int sPos = HEADER.length; + int len = (bout.size() - checksumSize - HEADER.length); + this.out.write(bout.getBuffer(), sPos, len); + + bufferFull = true; + bout.reset(); + } + + + @Override + protected void writeKVPair(byte[] keyData, int keyPos, int keyLength, + byte[] valueData, int valPos, int valueLength) throws IOException { + if (!bufferFull) { + // Compute actual payload size: write RLE marker, length info and then entire data. + totalSize += ((prevKey == REPEAT_KEY) ? V_END_MARKER_SIZE : 0) + + WritableUtils.getVIntSize(keyLength) + keyLength + + WritableUtils.getVIntSize(valueLength) + valueLength; + + if (shouldWriteToDisk()) { + resetToFileBasedWriter(); + } + } + super.writeKVPair(keyData, keyPos, keyLength, valueData, valPos, valueLength); + } + + @Override + protected void writeValue(byte[] data, int offset, int length) throws IOException { + if (!bufferFull) { + totalSize += ((prevKey != REPEAT_KEY) ? RLE_MARKER_SIZE : 0) + + WritableUtils.getVIntSize(length) + length; + + if (shouldWriteToDisk()) { + resetToFileBasedWriter(); + } + } + super.writeValue(data, offset, length); + } + + /** + * Check if data was flushed to disk. + * + * @return whether data is flushed to disk ot not + */ + public boolean isDataFlushedToDisk() { + return bufferFull; + } + + /** + * Get cached data if any + * + * @return if data is not flushed to disk, it returns in-mem contents + */ + public ByteBuffer getData() { + if (!isDataFlushedToDisk()) { + return ByteBuffer.wrap(cacheStream.getBuffer(), 0, cacheStream.size()); + } + return null; + } + + @VisibleForTesting + void setOutputPath(Path outputPath) { + this.outputPath = outputPath; + } + + public Path getOutputPath() { + return this.outputPath; + } + } + /** * IFile.Writer to write out intermediate map-outputs. */ @@ -118,44 +299,61 @@ public static class Writer { protected final boolean rle; - public Writer(Configuration conf, FileSystem fs, Path file, + public Writer(Serialization keySerialization, Serialization valSerialization, FileSystem fs, Path file, Class keyClass, Class valueClass, CompressionCodec codec, TezCounter writesCounter, TezCounter serializedBytesCounter) throws IOException { - this(conf, fs.create(file), keyClass, valueClass, codec, + this(keySerialization, valSerialization, fs.create(file), keyClass, valueClass, codec, writesCounter, serializedBytesCounter); ownOutputStream = true; } - - protected Writer(TezCounter writesCounter, TezCounter serializedBytesCounter) { + + protected Writer(TezCounter writesCounter, TezCounter serializedBytesCounter, boolean rle) { writtenRecordsCounter = writesCounter; serializedUncompressedBytes = serializedBytesCounter; - this.rle = false; + this.rle = rle; } - public Writer(Configuration conf, FSDataOutputStream outputStream, + public Writer(Serialization keySerialization, Serialization valSerialization, FSDataOutputStream outputStream, Class keyClass, Class valueClass, CompressionCodec codec, TezCounter writesCounter, TezCounter serializedBytesCounter) throws IOException { - this(conf, outputStream, keyClass, valueClass, codec, writesCounter, + this(keySerialization, valSerialization, outputStream, keyClass, valueClass, codec, writesCounter, serializedBytesCounter, false); } - public Writer(Configuration conf, FSDataOutputStream outputStream, - Class keyClass, Class valueClass, - CompressionCodec codec, TezCounter writesCounter, TezCounter serializedBytesCounter, - boolean rle) throws IOException { + public Writer(Serialization keySerialization, Serialization valSerialization, FSDataOutputStream outputStream, + Class keyClass, Class valueClass, + CompressionCodec codec, TezCounter writesCounter, TezCounter serializedBytesCounter, + boolean rle) throws IOException { this.rawOut = outputStream; this.writtenRecordsCounter = writesCounter; this.serializedUncompressedBytes = serializedBytesCounter; - this.checksumOut = new IFileOutputStream(outputStream); this.start = this.rawOut.getPos(); this.rle = rle; + + setupOutputStream(codec); + + writeHeader(outputStream); + + if (keyClass != null) { + this.closeSerializers = true; + this.keySerializer = keySerialization.getSerializer(keyClass); + this.keySerializer.open(buffer); + this.valueSerializer = valSerialization.getSerializer(valueClass); + this.valueSerializer.open(buffer); + } else { + this.closeSerializers = false; + } + } + + void setupOutputStream(CompressionCodec codec) throws IOException { + this.checksumOut = new IFileOutputStream(this.rawOut); if (codec != null) { - this.compressor = CodecPool.getCompressor(codec); + this.compressor = CodecUtils.getCompressor(codec); if (this.compressor != null) { this.compressor.reset(); - this.compressedOut = codec.createOutputStream(checksumOut, compressor); + this.compressedOut = CodecUtils.createOutputStream(codec, checksumOut, compressor); this.out = new FSDataOutputStream(this.compressedOut, null); this.compressOutput = true; } else { @@ -165,23 +363,10 @@ public Writer(Configuration conf, FSDataOutputStream outputStream, } else { this.out = new FSDataOutputStream(checksumOut,null); } - writeHeader(outputStream); - - if (keyClass != null) { - this.closeSerializers = true; - SerializationFactory serializationFactory = - new SerializationFactory(conf); - this.keySerializer = serializationFactory.getSerializer(keyClass); - this.keySerializer.open(buffer); - this.valueSerializer = serializationFactory.getSerializer(valueClass); - this.valueSerializer.open(buffer); - } else { - this.closeSerializers = false; - } } - public Writer(Configuration conf, FileSystem fs, Path file) throws IOException { - this(conf, fs, file, null, null, null, null, null); + public Writer(Serialization keySerialization, Serialization valSerialization, FileSystem fs, Path file) throws IOException { + this(keySerialization, valSerialization, fs, file, null, null, null, null, null); } protected void writeHeader(OutputStream outputStream) throws IOException { @@ -211,7 +396,7 @@ public void close() throws IOException { // Write EOF_MARKER for key/value length WritableUtils.writeVInt(out, EOF_MARKER); WritableUtils.writeVInt(out, EOF_MARKER); - decompressedBytesWritten += 2 * WritableUtils.getVIntSize(EOF_MARKER); + decompressedBytesWritten += 2L * WritableUtils.getVIntSize(EOF_MARKER); //account for header bytes decompressedBytesWritten += HEADER.length; @@ -253,10 +438,6 @@ public void close() throws IOException { * one, send IFile.REPEAT_KEY as key parameter. Should not call this method with * IFile.REPEAT_KEY as the first key. It is caller's responsibility to ensure that correct * key/value type checks and key/value length (non-negative) checks are done properly. - * - * @param key - * @param value - * @throws IOException */ public void append(Object key, Object value) throws IOException { int keyLength = 0; @@ -295,9 +476,6 @@ public void append(Object key, Object value) throws IOException { /** * Appends the value to previous key. Assumes that the caller has already done relevant checks * for identical keys. Also, no validations are done in this method - * - * @param value - * @throws IOException */ public void appendValue(Object value) throws IOException { valueSerializer.serialize(value); @@ -313,9 +491,6 @@ public void appendValue(Object value) throws IOException { * for identical keys. Also, no validations are done in this method. It is caller's responsibility * to pass non-negative key/value lengths. Otherwise,IndexOutOfBoundsException could be * thrown at runtime. - * - * @param value - * @throws IOException */ public void appendValue(DataInputBuffer value) throws IOException { int valueLength = value.getLength() - value.getPosition(); @@ -329,9 +504,6 @@ public void appendValue(DataInputBuffer value) throws IOException { /** * Appends the value to previous key. Assumes that the caller has already done relevant checks * for identical keys. Also, no validations are done in this method - * - * @param valuesItr - * @throws IOException */ public void appendValues(Iterator valuesItr) throws IOException { while(valuesItr.hasNext()) { @@ -341,12 +513,6 @@ public void appendValues(Iterator valuesItr) throws IOException { /** * Append key and its associated set of values. - * - * @param key - * @param valuesItr - * @param - * @param - * @throws IOException */ public void appendKeyValues(K key, Iterator valuesItr) throws IOException { if (valuesItr.hasNext()) { @@ -363,11 +529,6 @@ public void appendKeyValues(K key, Iterator valuesItr) throws IOExcept * one, send IFile.REPEAT_KEY as key parameter. Should not call this method with * IFile.REPEAT_KEY as the first key. It is caller's responsibility to pass non-negative * key/value lengths. Otherwise,IndexOutOfBoundsException could be thrown at runtime. - * - * - * @param key - * @param value - * @throws IOException */ public void append(DataInputBuffer key, DataInputBuffer value) throws IOException { int keyLength = key.getLength() - key.getPosition(); @@ -425,7 +586,7 @@ protected void writeKVPair(byte[] keyData, int keyPos, int keyLength, } protected void writeRLE(DataOutputStream out) throws IOException { - /** + /* * To strike a balance between 2 use cases (lots of unique KV in stream * vs lots of identical KV in stream), we start off by writing KV pair. * If subsequent KV is identical, we write RLE marker along with V_END_MARKER @@ -440,7 +601,7 @@ protected void writeRLE(DataOutputStream out) throws IOException { } protected void writeValueMarker(DataOutputStream out) throws IOException { - /** + /* * Write V_END_MARKER only in RLE scenario. This will * save space in conditions where lots of unique KV pairs are found in the * stream. @@ -508,7 +669,7 @@ public enum KeyState {NO_KEY, NEW_KEY, SAME_KEY} protected int recNo = 1; protected int originalKeyLength; protected int prevKeyLength; - byte keyBytes[] = new byte[0]; + private byte[] keyBytes = new byte[0]; protected int currentKeyLength; protected int currentValueLength; @@ -522,7 +683,6 @@ public enum KeyState {NO_KEY, NEW_KEY, SAME_KEY} * checksum bytes for the data at the end of the file. * @param codec codec * @param readsCounter Counter for records read from disk - * @throws IOException */ public Reader(FileSystem fs, Path file, CompressionCodec codec, @@ -541,7 +701,6 @@ public Reader(FileSystem fs, Path file, * bytes. * @param codec codec * @param readsCounter Counter for records read from disk - * @throws IOException */ public Reader(InputStream in, long length, CompressionCodec codec, @@ -550,7 +709,7 @@ public Reader(InputStream in, long length, int bufferSize) throws IOException { this(in, ((in != null) ? (length - HEADER.length) : length), codec, readsCounter, bytesReadCounter, readAhead, readAheadLength, - bufferSize, ((in != null) ? isCompressedFlagEnabled(in) : false)); + bufferSize, (in != null && isCompressedFlagEnabled(in))); if (in != null && bytesReadCounter != null) { bytesReadCounter.increment(IFile.HEADER.length); } @@ -564,7 +723,6 @@ public Reader(InputStream in, long length, * bytes. * @param codec codec * @param readsCounter Counter for records read from disk - * @throws IOException */ public Reader(InputStream in, long length, CompressionCodec codec, @@ -575,9 +733,9 @@ public Reader(InputStream in, long length, checksumIn = new IFileInputStream(in, length, readAhead, readAheadLength/* , isCompressed */); if (isCompressed && codec != null) { - decompressor = CodecPool.getDecompressor(codec); + decompressor = CodecUtils.getDecompressor(codec); if (decompressor != null) { - this.in = codec.createInputStream(checksumIn, decompressor); + this.in = CodecUtils.createInputStream(codec, checksumIn, decompressor); } else { LOG.warn("Could not obtain decompressor from CodecPool"); this.in = checksumIn; @@ -601,14 +759,6 @@ public Reader(InputStream in, long length, /** * Read entire ifile content to memory. - * - * @param buffer - * @param in - * @param compressedLength - * @param codec - * @param ifileReadAhead - * @param ifileReadAheadLength - * @throws IOException */ public static void readToMemory(byte[] buffer, InputStream in, int compressedLength, CompressionCodec codec, boolean ifileReadAhead, int ifileReadAheadLength) @@ -620,13 +770,13 @@ public static void readToMemory(byte[] buffer, InputStream in, int compressedLen in = checksumIn; Decompressor decompressor = null; if (isCompressed && codec != null) { - decompressor = CodecPool.getDecompressor(codec); + decompressor = CodecUtils.getDecompressor(codec); if (decompressor != null) { decompressor.reset(); - in = codec.createInputStream(checksumIn, decompressor); + in = CodecUtils.getDecompressedInputStreamWithBufferSize(codec, checksumIn, decompressor, + compressedLength); } else { LOG.warn("Could not obtain decompressor from CodecPool"); - in = checksumIn; } } try { @@ -645,9 +795,7 @@ public static void readToMemory(byte[] buffer, InputStream in, int compressedLen try { in.close(); } catch(IOException e) { - if(LOG.isDebugEnabled()) { - LOG.debug("Exception in closing " + in, e); - } + LOG.debug("Exception in closing {}", in, e); } } throw ioe; @@ -666,7 +814,6 @@ public static void readToMemory(byte[] buffer, InputStream in, int compressedLen * @param in the input stream containing the IFile data * @param length the amount of data to read from the input * @return the number of bytes copied - * @throws IOException */ public static long readToDisk(OutputStream out, InputStream in, long length, boolean ifileReadAhead, int ifileReadAheadLength) @@ -711,7 +858,6 @@ public long getPosition() throws IOException { * @param off offset * @param len length of buffer * @return the no. of bytes read - * @throws IOException */ private int readData(byte[] buf, int off, int len) throws IOException { int bytesRead = 0; @@ -752,7 +898,6 @@ protected void readKeyValueLength(DataInput dIn) throws IOException { * @param dIn * @return true if key length and value length were set to the next * false if end of file (EOF) marker was reached - * @throws IOException */ protected boolean positionToNextRecord(DataInput dIn) throws IOException { // Sanity check @@ -890,9 +1035,7 @@ public void close() throws IOException { } } - public void reset(int offset) { - return; - } + public void reset(int offset) {} public void disableChecksumValidation() { checksumIn.disableChecksumValidation(); diff --git a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/sort/impl/IFileInputStream.java b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/sort/impl/IFileInputStream.java index c5853d4a46..fbdfbc1b93 100644 --- a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/sort/impl/IFileInputStream.java +++ b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/sort/impl/IFileInputStream.java @@ -37,12 +37,12 @@ import org.apache.hadoop.util.DataChecksum; /** * A checksum input stream, used for IFiles. - * Used to validate the checksum of files created by {@link IFileOutputStream}. + * Used to validate the checksum of files created by {@link IFileOutputStream}. */ @InterfaceAudience.Private @InterfaceStability.Unstable public class IFileInputStream extends InputStream { - + private final InputStream in; //The input stream to be verified for checksum. private final FileDescriptor inFd; // the file descriptor, if it is known private final long length; //The total length of the input file @@ -137,16 +137,16 @@ public void close() throws IOException { } in.close(); } - + @Override public long skip(long n) throws IOException { throw new IOException("Skip not supported for IFileInputStream"); } - + public long getPosition() { return (currentOffset >= dataLength) ? dataLength : currentOffset; } - + public long getSize() { return checksumSize; } @@ -167,11 +167,11 @@ private void checksum(byte[] b, int off, int len) { System.arraycopy(b, off, buffer, offset, len); offset += len; } - + /** * Read bytes from the stream. * At EOF, checksum is validated, but the checksum - * bytes are not passed back in the buffer. + * bytes are not passed back in the buffer. */ @Override public int read(byte[] b, int off, int len) throws IOException { @@ -207,13 +207,13 @@ public int readWithChecksum(byte[] b, int off, int len) throws IOException { } else if (currentOffset >= dataLength) { // If the previous read drained off all the data, then just return - // the checksum now. Note that checksum validation would have + // the checksum now. Note that checksum validation would have // happened in the earlier read int lenToCopy = (int) (checksumSize - (currentOffset - dataLength)); if (len < lenToCopy) { lenToCopy = len; } - System.arraycopy(csum, (int) (currentOffset - dataLength), b, off, + System.arraycopy(csum, (int) (currentOffset - dataLength), b, off, lenToCopy); currentOffset += lenToCopy; return lenToCopy; @@ -232,21 +232,21 @@ else if (currentOffset >= dataLength) { } private int doRead(byte[]b, int off, int len) throws IOException { - + // If we are trying to read past the end of data, just read - // the left over data + // the leftover data int origLen = len; if (currentOffset + len > dataLength) { len = (int) (dataLength - currentOffset); } - + int bytesRead = in.read(b, off, len); if (bytesRead < 0) { String mesg = " CurrentOffset=" + currentOffset + ", offset=" + offset + ", off=" + off + - ", dataLength=" + dataLength + + ", dataLength=" + dataLength + ", origLen=" + origLen + ", len=" + len + ", length=" + length + @@ -262,7 +262,7 @@ private int doRead(byte[]b, int off, int len) throws IOException { if (disableChecksumValidation) { return bytesRead; } - + if (currentOffset == dataLength) { //TODO: add checksumSize to currentOffset. // The last four bytes are checksum. Strip them and verify @@ -272,13 +272,13 @@ private int doRead(byte[]b, int off, int len) throws IOException { if (!sum.compare(csum, 0)) { String mesg = "CurrentOffset=" + currentOffset + ", off=" + offset + - ", dataLength=" + dataLength + + ", dataLength=" + dataLength + ", origLen=" + origLen + ", len=" + len + ", length=" + length + ", checksumSize=" + checksumSize+ ", csum=" + Arrays.toString(csum) + - ", sum=" + sum; + ", sum=" + sum; LOG.info(mesg); throw new ChecksumException("Checksum Error: " + mesg, 0); @@ -289,11 +289,11 @@ private int doRead(byte[]b, int off, int len) throws IOException { @Override - public int read() throws IOException { + public int read() throws IOException { b[0] = 0; int l = read(b,0,1); if (l < 0) return l; - + // Upgrade the b[0] to an int so as not to misinterpret the // first bit of the byte as a sign bit int result = 0xFF & b[0]; diff --git a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/sort/impl/IFileOutputStream.java b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/sort/impl/IFileOutputStream.java index 319844680d..5ec0537288 100644 --- a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/sort/impl/IFileOutputStream.java +++ b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/sort/impl/IFileOutputStream.java @@ -59,6 +59,10 @@ public IFileOutputStream(OutputStream out) { offset = 0; } + public static int getCheckSumSize() { + return DataChecksum.Type.CRC32.size; + } + @Override public void close() throws IOException { if (closed) { diff --git a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/sort/impl/PipelinedSorter.java b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/sort/impl/PipelinedSorter.java index 88d10d0f11..d0eeadca06 100644 --- a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/sort/impl/PipelinedSorter.java +++ b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/sort/impl/PipelinedSorter.java @@ -34,10 +34,11 @@ import java.util.zip.Deflater; import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; +import org.apache.tez.common.Preconditions; import com.google.common.collect.Lists; import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.fs.permission.FsPermission; import org.apache.tez.dag.api.TezConfiguration; import org.apache.tez.runtime.library.api.IOInterruptedException; import org.slf4j.Logger; @@ -68,6 +69,8 @@ import com.google.common.util.concurrent.ThreadFactoryBuilder; +import static org.apache.tez.runtime.library.common.sort.impl.TezSpillRecord.ensureSpillFilePermissions; + @SuppressWarnings({"unchecked", "rawtypes"}) public class PipelinedSorter extends ExternalSorter { @@ -113,6 +116,8 @@ public class PipelinedSorter extends ExternalSorter { //Maintain a list of ByteBuffers @VisibleForTesting final List buffers; + @VisibleForTesting + List bufferUsage; final int maxNumberOfBlocks; private int bufferIndex = -1; private final int MIN_BLOCK_SIZE; @@ -120,6 +125,11 @@ public class PipelinedSorter extends ExternalSorter { private final Deflater deflater; private final String auxiliaryService; + /** + * Store the events to be send in close. + */ + private final List finalEvents; + // TODO Set additional countesr - total bytes written, spills etc. public PipelinedSorter(OutputContext outputContext, Configuration conf, int numOutputs, @@ -151,7 +161,7 @@ public PipelinedSorter(OutputContext outputContext, Configuration conf, int numO } StringBuilder initialSetupLogLine = new StringBuilder("Setting up PipelinedSorter for ") - .append(outputContext.getDestinationVertexName()).append(": "); + .append(outputContext.getInputOutputVertexNames()).append(": "); partitionBits = bitcount(partitions)+1; boolean confPipelinedShuffle = this.conf.getBoolean(TezRuntimeConfiguration @@ -194,6 +204,7 @@ public PipelinedSorter(OutputContext outputContext, Configuration conf, int numO capacity = totalCapacityWithoutMeta; buffers = Lists.newArrayListWithCapacity(maxNumberOfBlocks); + bufferUsage = Lists.newArrayListWithCapacity(maxNumberOfBlocks); allocateSpace(); //Allocate the first block if (!lazyAllocateMem) { LOG.info("Pre allocating rest of memory buffers upfront"); @@ -224,15 +235,15 @@ public PipelinedSorter(OutputContext outputContext, Configuration conf, int numO TezRuntimeConfiguration.TEZ_RUNTIME_PIPELINED_SORTER_SORT_THREADS_DEFAULT); sortmaster = Executors.newFixedThreadPool(sortThreads, new ThreadFactoryBuilder().setDaemon(true) - .setNameFormat("Sorter {" + TezUtilsInternal - .cleanVertexName(outputContext.getDestinationVertexName()) + "} #%d") - .build()); - + .setNameFormat("Sorter {" + TezUtilsInternal.cleanVertexName(outputContext.getTaskVertexName()) + " -> " + + TezUtilsInternal.cleanVertexName(outputContext.getDestinationVertexName()) + "} #%d") + .build()); valSerializer.open(span.out); keySerializer.open(span.out); minSpillsForCombine = this.conf.getInt(TezRuntimeConfiguration.TEZ_RUNTIME_COMBINE_MIN_SPILLS, 3); deflater = TezCommonUtils.newBestCompressionDeflater(); + finalEvents = Lists.newLinkedList(); } ByteBuffer allocateSpace() { @@ -248,6 +259,7 @@ ByteBuffer allocateSpace() { buffers.add(space); bufferIndex++; + bufferUsage.add(0); Preconditions.checkState(buffers.size() <= maxNumberOfBlocks, "Number of blocks " + buffers.size() @@ -323,13 +335,15 @@ public void sort() throws IOException { boolean ret = spill(true); stopWatch.stop(); if (LOG.isDebugEnabled()) { - LOG.debug(outputContext.getDestinationVertexName() + ": Time taken for spill " + (stopWatch.now(TimeUnit.MILLISECONDS)) + " ms"); + LOG.debug(outputContext.getInputOutputVertexNames() + ": Time taken for spill " + + (stopWatch.now(TimeUnit.MILLISECONDS)) + " ms"); } if (pipelinedShuffle && ret) { sendPipelinedShuffleEvents(); } - //safe to reset bufferIndex to 0; - bufferIndex = 0; + // Use the next buffer + bufferIndex = (bufferIndex + 1) % buffers.size(); + bufferUsage.set(bufferIndex, bufferUsage.get(bufferIndex) + 1); int items = 1024*1024; int perItem = 16; if(span.length() != 0) { @@ -366,7 +380,7 @@ private void sendPipelinedShuffleEvents() throws IOException{ partitions, sendEmptyPartitionDetails, pathComponent, partitionStats, reportDetailedPartitionStats(), auxiliaryService, deflater); outputContext.sendEvents(events); - LOG.info(outputContext.getDestinationVertexName() + + LOG.info(outputContext.getInputOutputVertexNames() + ": Added spill event for spill (final update=false), spillId=" + (numSpills - 1)); } @@ -384,14 +398,14 @@ public void write(Object key, Object value) */ synchronized void collect(Object key, Object value, final int partition ) throws IOException { - if (key.getClass() != keyClass) { + if (key.getClass() != serializationContext.getKeyClass()) { throw new IOException("Type mismatch in key from map: expected " - + keyClass.getName() + ", received " + + serializationContext.getKeyClass().getName() + ", received " + key.getClass().getName()); } - if (value.getClass() != valClass) { + if (value.getClass() != serializationContext.getValueClass()) { throw new IOException("Type mismatch in value from map: expected " - + valClass.getName() + ", received " + + serializationContext.getValueClass().getName() + ", received " + value.getClass().getName()); } if (partition < 0 || partition >= partitions) { @@ -479,9 +493,10 @@ private void spillSingleRecord(final Object key, final Object value, * MAP_OUTPUT_INDEX_RECORD_LENGTH); spillFilePaths.put(numSpills, filename); FSDataOutputStream out = rfs.create(filename, true, 4096); + ensureSpillFilePermissions(filename, rfs); try { - LOG.info(outputContext.getDestinationVertexName() + ": Spilling to " + filename.toString() + + LOG.info(outputContext.getInputOutputVertexNames() + ": Spilling to " + filename.toString() + ", indexFilename=" + indexFilename); for (int i = 0; i < partitions; ++i) { if (isThreadInterrupted()) { @@ -491,8 +506,9 @@ private void spillSingleRecord(final Object key, final Object value, try { long segmentStart = out.getPos(); if (!sendEmptyPartitionDetails || (i == partition)) { - writer = new Writer(conf, out, keyClass, valClass, codec, - spilledRecordsCounter, null, false); + writer = new Writer(serializationContext.getKeySerialization(), + serializationContext.getValSerialization(), out, serializationContext.getKeyClass(), + serializationContext.getValueClass(), codec, spilledRecordsCounter, null, false); } // we need not check for combiner since its a single record if (i == partition) { @@ -523,7 +539,7 @@ private void spillSingleRecord(final Object key, final Object value, } spillFileIndexPaths.put(numSpills, indexFilename); - spillRec.writeToFile(indexFilename, conf); + spillRec.writeToFile(indexFilename, conf, localFs); //TODO: honor cache limits indexCacheList.add(spillRec); ++numSpills; @@ -552,8 +568,9 @@ public boolean spill(boolean ignoreEmptySpills) throws IOException { } } catch (InterruptedException e) { Thread.currentThread().interrupt(); - LOG.info(outputContext.getDestinationVertexName() + ": Interrupted while waiting for mergers to complete"); - throw new IOInterruptedException(outputContext.getDestinationVertexName() + ": Interrupted while waiting for mergers to complete", e); + LOG.info(outputContext.getInputOutputVertexNames() + ": Interrupted while waiting for mergers to complete"); + throw new IOInterruptedException( + outputContext.getInputOutputVertexNames() + ": Interrupted while waiting for mergers to complete", e); } // create spill file @@ -564,7 +581,8 @@ public boolean spill(boolean ignoreEmptySpills) throws IOException { mapOutputFile.getSpillFileForWrite(numSpills, size); spillFilePaths.put(numSpills, filename); out = rfs.create(filename, true, 4096); - LOG.info(outputContext.getDestinationVertexName() + ": Spilling to " + filename.toString()); + ensureSpillFilePermissions(filename, rfs); + LOG.info(outputContext.getInputOutputVertexNames() + ": Spilling to " + filename.toString()); for (int i = 0; i < partitions; ++i) { if (isThreadInterrupted()) { return false; @@ -574,15 +592,16 @@ public boolean spill(boolean ignoreEmptySpills) throws IOException { //write merged output to disk long segmentStart = out.getPos(); Writer writer = null; - boolean hasNext = kvIter.next(); + boolean hasNext = kvIter.hasNext(); if (hasNext || !sendEmptyPartitionDetails) { - writer = new Writer(conf, out, keyClass, valClass, codec, - spilledRecordsCounter, null, merger.needsRLE()); + writer = new Writer(serializationContext.getKeySerialization(), + serializationContext.getValSerialization(), out, serializationContext.getKeyClass(), + serializationContext.getValueClass(), codec, spilledRecordsCounter, null, + merger.needsRLE()); } if (combiner == null) { - while (hasNext) { + while (kvIter.next()) { writer.append(kvIter.getKey(), kvIter.getValue()); - hasNext = kvIter.next(); } } else { if (hasNext) { @@ -603,7 +622,7 @@ public boolean spill(boolean ignoreEmptySpills) throws IOException { new TezIndexRecord(segmentStart, rawLength, partLength); spillRec.putIndex(rec, i); if (!isFinalMergeEnabled() && reportPartitionStats()) { - partitionStats[i] += partLength; + partitionStats[i] += rawLength; } } @@ -611,7 +630,7 @@ public boolean spill(boolean ignoreEmptySpills) throws IOException { mapOutputFile.getSpillIndexFileForWrite(numSpills, partitions * MAP_OUTPUT_INDEX_RECORD_LENGTH); spillFileIndexPaths.put(numSpills, indexFilename); - spillRec.writeToFile(indexFilename, conf); + spillRec.writeToFile(indexFilename, conf, localFs); //TODO: honor cache limits indexCacheList.add(spillRec); ++numSpills; @@ -634,8 +653,9 @@ private boolean isThreadInterrupted() throws IOException { cleanup(); } sortmaster.shutdownNow(); - LOG.info(outputContext.getDestinationVertexName() + ": Thread interrupted, cleaned up stale data, sorter threads shutdown=" + sortmaster - .isShutdown() + ", terminated=" + sortmaster.isTerminated()); + LOG.info(outputContext.getInputOutputVertexNames() + + ": Thread interrupted, cleaned up stale data, sorter threads shutdown=" + sortmaster.isShutdown() + + ", terminated=" + sortmaster.isTerminated()); return true; } return false; @@ -656,7 +676,7 @@ public void flush() throws IOException { } try { - LOG.info(outputContext.getDestinationVertexName() + ": Starting flush of map output"); + LOG.info(outputContext.getInputOutputVertexNames() + ": Starting flush of map output"); span.end(); merger.add(span.sort(sorter)); // force a spill in flush() @@ -680,15 +700,13 @@ public void flush() throws IOException { * NPE leading to distraction when debugging. */ if (LOG.isDebugEnabled()) { - LOG.debug(outputContext.getDestinationVertexName() + LOG.debug(outputContext.getInputOutputVertexNames() + ": Index list is empty... returning"); } return; } if (!isFinalMergeEnabled()) { - //Generate events for all spills - List events = Lists.newLinkedList(); //For pipelined shuffle, previous events are already sent. Just generate the last event alone int startIndex = (pipelinedShuffle) ? (numSpills - 1) : 0; @@ -697,13 +715,13 @@ public void flush() throws IOException { for (int i = startIndex; i < endIndex; i++) { boolean isLastEvent = (i == numSpills - 1); String pathComponent = (outputContext.getUniqueIdentifier() + "_" + i); - ShuffleUtils.generateEventOnSpill(events, isFinalMergeEnabled(), isLastEvent, + ShuffleUtils.generateEventOnSpill(finalEvents, isFinalMergeEnabled(), isLastEvent, outputContext, i, indexCacheList.get(i), partitions, sendEmptyPartitionDetails, pathComponent, partitionStats, reportDetailedPartitionStats(), auxiliaryService, deflater); - LOG.info(outputContext.getDestinationVertexName() + ": Adding spill event for spill (final update=" + isLastEvent + "), spillId=" + i); + LOG.info(outputContext.getInputOutputVertexNames() + ": Adding spill event for spill (final update=" + + isLastEvent + "), spillId=" + i); } - outputContext.sendEvents(events); return; } @@ -721,15 +739,15 @@ public void flush() throws IOException { sameVolRename(filename, finalOutputFile); sameVolRename(indexFilename, finalIndexFile); if (LOG.isDebugEnabled()) { - LOG.debug(outputContext.getDestinationVertexName() + ": numSpills=" + numSpills + + LOG.debug(outputContext.getInputOutputVertexNames() + ": numSpills=" + numSpills + ", finalOutputFile=" + finalOutputFile + ", " + "finalIndexFile=" + finalIndexFile + ", filename=" + filename + ", indexFilename=" + indexFilename); } - TezSpillRecord spillRecord = new TezSpillRecord(finalIndexFile, conf); + TezSpillRecord spillRecord = new TezSpillRecord(finalIndexFile, localFs); if (reportPartitionStats()) { for (int i = 0; i < spillRecord.size(); i++) { - partitionStats[i] += spillRecord.getIndex(i).getPartLength(); + partitionStats[i] += spillRecord.getIndex(i).getRawLength(); } } numShuffleChunks.setValue(numSpills); @@ -744,12 +762,13 @@ public void flush() throws IOException { mapOutputFile.getOutputIndexFileForWrite(0); //TODO if (LOG.isDebugEnabled()) { - LOG.debug(outputContext.getDestinationVertexName() + ": " + + LOG.debug(outputContext.getInputOutputVertexNames() + ": " + "numSpills: " + numSpills + ", finalOutputFile:" + finalOutputFile + ", finalIndexFile:" + finalIndexFile); } //The output stream for the final single output file FSDataOutputStream finalOut = rfs.create(finalOutputFile, true, 4096); + ensureSpillFilePermissions(finalOutputFile, rfs); final TezSpillRecord spillRec = new TezSpillRecord(partitions); @@ -778,7 +797,7 @@ public void flush() throws IOException { boolean sortSegments = segmentList.size() > mergeFactor; //merge TezRawKeyValueIterator kvIter = TezMerger.merge(conf, rfs, - keyClass, valClass, codec, + serializationContext, codec, segmentList, mergeFactor, new Path(uniqueIdentifier), (RawComparator) ConfigUtils.getIntermediateOutputKeyComparator(conf), @@ -790,9 +809,10 @@ public void flush() throws IOException { long rawLength = 0; long partLength = 0; if (shouldWrite) { - Writer writer = - new Writer(conf, finalOut, keyClass, valClass, codec, - spilledRecordsCounter, null, merger.needsRLE()); + Writer writer = new Writer(serializationContext.getKeySerialization(), + serializationContext.getValSerialization(), finalOut, + serializationContext.getKeyClass(), serializationContext.getValueClass(), codec, + spilledRecordsCounter, null, merger.needsRLE()); if (combiner == null || numSpills < minSpillsForCombine) { TezMerger.writeFile(kvIter, writer, progressable, TezRuntimeConfiguration.TEZ_RUNTIME_RECORDS_BEFORE_PROGRESS_DEFAULT); @@ -812,14 +832,14 @@ public void flush() throws IOException { new TezIndexRecord(segmentStart, rawLength, partLength); spillRec.putIndex(rec, parts); if (reportPartitionStats()) { - partitionStats[parts] += partLength; + partitionStats[parts] += rawLength; } } numShuffleChunks.setValue(1); //final merge has happened. fileOutputByteCounter.increment(rfs.getFileStatus(finalOutputFile).getLen()); - spillRec.writeToFile(finalIndexFile, conf); + spillRec.writeToFile(finalIndexFile, conf, localFs); finalOut.close(); for (int i = 0; i < numSpills; i++) { Path indexFilename = spillFileIndexPaths.get(i); @@ -839,9 +859,20 @@ public void flush() throws IOException { } } + /** + * Close and send events. + * @return events to be returned by the edge. + * @throws IOException parent can throw this. + */ + public final List close() throws IOException { + super.close(); + return finalEvents; + } + private interface PartitionedRawKeyValueIterator extends TezRawKeyValueIterator { int getPartition(); + Integer peekPartition(); } private static class BufferStreamWrapper extends OutputStream @@ -909,14 +940,14 @@ private final class SortSpan implements IndexedSortable { public SortSpan(ByteBuffer source, int maxItems, int perItem, RawComparator comparator) { capacity = source.remaining(); int metasize = METASIZE*maxItems; - int dataSize = maxItems * perItem; + long dataSize = (long) maxItems * (long) perItem; if(capacity < (metasize+dataSize)) { // try to allocate less meta space, because we have sample data metasize = METASIZE*(capacity/(perItem+METASIZE)); } ByteBuffer reserved = source.duplicate(); reserved.mark(); - LOG.info(outputContext.getDestinationVertexName() + ": " + "reserved.remaining()=" + + LOG.info(outputContext.getInputOutputVertexNames() + ": " + "reserved.remaining()=" + reserved.remaining() + ", reserved.metasize=" + metasize); reserved.position(metasize); kvbuffer = reserved.slice(); @@ -938,8 +969,8 @@ public SpanIterator sort(IndexedSorter sorter) { if(length() > 1) { sorter.sort(this, 0, length(), progressable); } - LOG.info(outputContext.getDestinationVertexName() + ": " + "done sorting span=" + index + ", length=" + length() + ", " - + "time=" + (System.currentTimeMillis() - start)); + LOG.info(outputContext.getInputOutputVertexNames() + ": " + "done sorting span=" + index + ", length=" + length() + + ", " + "time=" + (System.currentTimeMillis() - start)); return new SpanIterator((SortSpan)this); } @@ -1014,8 +1045,9 @@ public SortSpan next() { } newSpan = new SortSpan(remaining, items, perItem, newComparator); newSpan.index = index+1; - LOG.info(String.format(outputContext.getDestinationVertexName() + ": " + "New Span%d.length = %d, perItem = %d", newSpan.index, newSpan - .length(), perItem) + ", counter:" + mapOutputRecordCounter.getValue()); + LOG.info( + String.format(outputContext.getInputOutputVertexNames() + ": " + "New Span%d.length = %d, perItem = %d", + newSpan.index, newSpan.length(), perItem) + ", counter:" + mapOutputRecordCounter.getValue()); return newSpan; } return null; @@ -1036,13 +1068,14 @@ public ByteBuffer end() { return null; } int perItem = kvbuffer.position()/items; - LOG.info(outputContext.getDestinationVertexName() + ": " + String.format("Span%d.length = %d, perItem = %d", index, length(), perItem)); + LOG.info(outputContext.getInputOutputVertexNames() + ": " + + String.format("Span%d.length = %d, perItem = %d", index, length(), perItem)); if(remaining.remaining() < METASIZE+perItem) { //Check if we can get the next Buffer from the main buffer list ByteBuffer space = allocateSpace(); if (space != null) { - LOG.info(outputContext.getDestinationVertexName() + ": " + "Getting memory from next block in the list, recordsWritten=" + - mapOutputRecordCounter.getValue()); + LOG.info(outputContext.getInputOutputVertexNames() + ": " + + "Getting memory from next block in the list, recordsWritten=" + mapOutputRecordCounter.getValue()); reinit = true; return space; } @@ -1122,13 +1155,18 @@ public DataInputBuffer getValue() { public boolean next() { // caveat: since we use this as a comparable in the merger if(kvindex == maxindex) return false; + kvindex += 1; if(kvindex % 100 == 0) { - progress.set((kvindex-maxindex) / (float)maxindex); + progress.set(1 - ((maxindex - kvindex) / (float) maxindex)); } - kvindex += 1; return true; } + @Override + public boolean hasNext() { + return (kvindex == maxindex); + } + public void close() { } @@ -1146,6 +1184,14 @@ public int getPartition() { return partition; } + public Integer peekPartition() { + if (!hasNext()) { + return null; + } else { + return kvmeta.get(span.offsetFor(kvindex + 1) + PARTITION); + } + } + @SuppressWarnings("unused") public int size() { return (maxindex - kvindex); @@ -1154,7 +1200,7 @@ public int size() { public int compareTo(SpanIterator other) { return span.compareInternal(other.getKey(), other.getPartition(), kvindex); } - + @Override public String toString() { return String.format("SpanIterator<%d:%d> (span=%s)", kvindex, maxindex, span.toString()); @@ -1264,6 +1310,23 @@ public boolean next() throws IOException { return false; } + @Override + public boolean hasNext() throws IOException { + if (dirty || iter.hasNext()) { + Integer part; + if (dirty) { + part = iter.getPartition(); + } else { + part = iter.peekPartition(); + } + + if (part != null) { + return (part >>> (32 - partitionBits)) == partition; + } + } + return false; + } + public void reset(int partition) { this.partition = partition; } @@ -1345,7 +1408,7 @@ public final boolean ready() throws IOException, InterruptedException { total += sp.span.length(); eq += sp.span.getEq(); } - LOG.info(outputContext.getDestinationVertexName() + ": " + "Heap = " + sb.toString()); + LOG.info(outputContext.getInputOutputVertexNames() + ": " + "Heap = " + sb.toString()); return true; } catch(ExecutionException e) { LOG.error("Heap size={}, total={}, eq={}, partition={}, gallop={}, totalItr={}," @@ -1403,6 +1466,20 @@ public final boolean next() { return false; } + @Override + public boolean hasNext() { + return peek() != null; + } + + public Integer peekPartition() { + if (!hasNext()) { + return null; + } else { + SpanIterator peek = peek(); + return peek.getPartition(); + } + } + public DataInputBuffer getKey() { return key; } public DataInputBuffer getValue() { return value; } public int getPartition() { return partition; } diff --git a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/sort/impl/TezMerger.java b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/sort/impl/TezMerger.java index 6eb9a40c93..061ba18384 100644 --- a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/sort/impl/TezMerger.java +++ b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/sort/impl/TezMerger.java @@ -47,6 +47,7 @@ import org.apache.tez.common.counters.TezCounter; import org.apache.tez.runtime.library.api.TezRuntimeConfiguration; import org.apache.tez.runtime.library.common.Constants; +import org.apache.tez.runtime.library.common.serializer.SerializationContext; import org.apache.tez.runtime.library.common.sort.impl.IFile.Reader; import org.apache.tez.runtime.library.common.sort.impl.IFile.Reader.KeyState; import org.apache.tez.runtime.library.common.sort.impl.IFile.Writer; @@ -60,166 +61,180 @@ @InterfaceAudience.Private @InterfaceStability.Unstable @SuppressWarnings({"unchecked", "rawtypes"}) -public class TezMerger { +public final class TezMerger { private static final Logger LOG = LoggerFactory.getLogger(TezMerger.class); - + // Local directories - private static LocalDirAllocator lDirAlloc = + private static final LocalDirAllocator L_DIR_ALLOC = new LocalDirAllocator(TezRuntimeFrameworkConfigs.LOCAL_DIRS); - public static - TezRawKeyValueIterator merge(Configuration conf, FileSystem fs, - Class keyClass, Class valueClass, - CompressionCodec codec, boolean ifileReadAhead, - int ifileReadAheadLength, int ifileBufferSize, - Path[] inputs, boolean deleteInputs, - int mergeFactor, Path tmpDir, - RawComparator comparator, Progressable reporter, - TezCounter readsCounter, - TezCounter writesCounter, - TezCounter bytesReadCounter, - Progress mergePhase) - throws IOException, InterruptedException { - return - new MergeQueue(conf, fs, inputs, deleteInputs, codec, ifileReadAhead, - ifileReadAheadLength, ifileBufferSize, false, comparator, - reporter, null).merge(keyClass, valueClass, - mergeFactor, tmpDir, - readsCounter, writesCounter, - bytesReadCounter, - mergePhase); + private TezMerger() {} + + public static TezRawKeyValueIterator merge(Configuration conf, FileSystem fs, + SerializationContext serializationContext, + CompressionCodec codec, boolean ifileReadAhead, + int ifileReadAheadLength, int ifileBufferSize, + Path[] inputs, boolean deleteInputs, + int mergeFactor, Path tmpDir, + RawComparator comparator, Progressable reporter, + TezCounter readsCounter, + TezCounter writesCounter, + TezCounter bytesReadCounter, + Progress mergePhase) + throws IOException, InterruptedException { + return + new MergeQueue(conf, fs, inputs, deleteInputs, codec, ifileReadAhead, + ifileReadAheadLength, ifileBufferSize, false, comparator, + reporter, null).merge(serializationContext, + mergeFactor, tmpDir, + readsCounter, writesCounter, + bytesReadCounter, + mergePhase); } // Used by the in-memory merger. - public static - TezRawKeyValueIterator merge(Configuration conf, FileSystem fs, - Class keyClass, Class valueClass, - List segments, - int mergeFactor, Path tmpDir, - RawComparator comparator, Progressable reporter, - TezCounter readsCounter, - TezCounter writesCounter, - TezCounter bytesReadCounter, - Progress mergePhase) - throws IOException, InterruptedException { + public static TezRawKeyValueIterator merge(Configuration conf, FileSystem fs, + SerializationContext serializationContext, + List segments, + int mergeFactor, Path tmpDir, + RawComparator comparator, Progressable reporter, + TezCounter readsCounter, + TezCounter writesCounter, + TezCounter bytesReadCounter, + Progress mergePhase) + throws IOException, InterruptedException { // Get rid of this ? - return merge(conf, fs, keyClass, valueClass, segments, mergeFactor, tmpDir, - comparator, reporter, false, readsCounter, writesCounter, bytesReadCounter, - mergePhase); + return merge(conf, fs, serializationContext, segments, mergeFactor, tmpDir, + comparator, reporter, false, readsCounter, writesCounter, bytesReadCounter, + mergePhase); } - public static + public static TezRawKeyValueIterator merge(Configuration conf, FileSystem fs, - Class keyClass, Class valueClass, - List segments, - int mergeFactor, Path tmpDir, - RawComparator comparator, Progressable reporter, - boolean sortSegments, - TezCounter readsCounter, - TezCounter writesCounter, - TezCounter bytesReadCounter, - Progress mergePhase) - throws IOException, InterruptedException { + SerializationContext serializationContext, + List segments, + int mergeFactor, Path tmpDir, + RawComparator comparator, Progressable reporter, + boolean sortSegments, + TezCounter readsCounter, + TezCounter writesCounter, + TezCounter bytesReadCounter, + Progress mergePhase) + throws IOException, InterruptedException { return new MergeQueue(conf, fs, segments, comparator, reporter, - sortSegments, false).merge(keyClass, valueClass, - mergeFactor, tmpDir, - readsCounter, writesCounter, - bytesReadCounter, mergePhase); + sortSegments, false).merge(serializationContext, mergeFactor, tmpDir, + readsCounter, writesCounter, + bytesReadCounter, mergePhase); } - public static - TezRawKeyValueIterator merge(Configuration conf, FileSystem fs, - Class keyClass, Class valueClass, + public static TezRawKeyValueIterator merge(Configuration conf, FileSystem fs, + SerializationContext serializationContext, CompressionCodec codec, List segments, int mergeFactor, Path tmpDir, RawComparator comparator, Progressable reporter, - boolean sortSegments, - boolean considerFinalMergeForProgress, TezCounter readsCounter, TezCounter writesCounter, TezCounter bytesReadCounter, - Progress mergePhase, boolean checkForSameKeys) - throws IOException, InterruptedException { + Progress mergePhase) throws IOException, InterruptedException { return new MergeQueue(conf, fs, segments, comparator, reporter, - sortSegments, codec, considerFinalMergeForProgress, checkForSameKeys). - merge(keyClass, valueClass, - mergeFactor, tmpDir, - readsCounter, writesCounter, - bytesReadCounter, - mergePhase); + false, codec, false, false) + .merge(serializationContext, mergeFactor, tmpDir, + readsCounter, writesCounter, bytesReadCounter, mergePhase); } - public static + public static TezRawKeyValueIterator merge(Configuration conf, FileSystem fs, - Class keyClass, Class valueClass, - CompressionCodec codec, - List segments, - int mergeFactor, Path tmpDir, - RawComparator comparator, Progressable reporter, - boolean sortSegments, - boolean considerFinalMergeForProgress, - TezCounter readsCounter, - TezCounter writesCounter, - TezCounter bytesReadCounter, - Progress mergePhase) - throws IOException, InterruptedException { + SerializationContext serializationContext, + CompressionCodec codec, + List segments, + int mergeFactor, Path tmpDir, + RawComparator comparator, Progressable reporter, + boolean sortSegments, + boolean considerFinalMergeForProgress, + TezCounter readsCounter, + TezCounter writesCounter, + TezCounter bytesReadCounter, + Progress mergePhase, boolean checkForSameKeys) + throws IOException, InterruptedException { return new MergeQueue(conf, fs, segments, comparator, reporter, - sortSegments, codec, considerFinalMergeForProgress). - merge(keyClass, valueClass, - mergeFactor, tmpDir, - readsCounter, writesCounter, - bytesReadCounter, - mergePhase); + sortSegments, codec, considerFinalMergeForProgress, checkForSameKeys). + merge(serializationContext, + mergeFactor, tmpDir, + readsCounter, writesCounter, + bytesReadCounter, + mergePhase); } - public static + public static TezRawKeyValueIterator merge(Configuration conf, FileSystem fs, - Class keyClass, Class valueClass, - CompressionCodec codec, - List segments, - int mergeFactor, int inMemSegments, Path tmpDir, - RawComparator comparator, Progressable reporter, - boolean sortSegments, - TezCounter readsCounter, - TezCounter writesCounter, - TezCounter bytesReadCounter, - Progress mergePhase) - throws IOException, InterruptedException { - return new MergeQueue(conf, fs, segments, comparator, reporter, - sortSegments, codec, false).merge(keyClass, valueClass, - mergeFactor, inMemSegments, - tmpDir, - readsCounter, writesCounter, - bytesReadCounter, - mergePhase); -} + SerializationContext serializationContext, + CompressionCodec codec, + List segments, + int mergeFactor, Path tmpDir, + RawComparator comparator, Progressable reporter, + boolean sortSegments, + boolean considerFinalMergeForProgress, + TezCounter readsCounter, + TezCounter writesCounter, + TezCounter bytesReadCounter, + Progress mergePhase) + throws IOException, InterruptedException { + return new MergeQueue(conf, fs, segments, comparator, reporter, + sortSegments, codec, considerFinalMergeForProgress). + merge(serializationContext, mergeFactor, tmpDir, + readsCounter, writesCounter, + bytesReadCounter, + mergePhase); + } - public static + public static + TezRawKeyValueIterator merge(Configuration conf, FileSystem fs, + SerializationContext serializationContext, + CompressionCodec codec, + List segments, + int mergeFactor, int inMemSegments, Path tmpDir, + RawComparator comparator, Progressable reporter, + boolean sortSegments, + TezCounter readsCounter, + TezCounter writesCounter, + TezCounter bytesReadCounter, + Progress mergePhase) + throws IOException, InterruptedException { + return new MergeQueue(conf, fs, segments, comparator, reporter, + sortSegments, codec, false).merge(serializationContext, + mergeFactor, inMemSegments, + tmpDir, + readsCounter, writesCounter, + bytesReadCounter, + mergePhase); + } + + public static void writeFile(TezRawKeyValueIterator records, Writer writer, - Progressable progressable, long recordsBeforeProgress) - throws IOException, InterruptedException { + Progressable progressable, long recordsBeforeProgress) + throws IOException, InterruptedException { long recordCtr = 0; long count = 0; - while(records.next()) { + while (records.next()) { if (records.isSameKey()) { writer.append(IFile.REPEAT_KEY, records.getValue()); count++; } else { writer.append(records.getKey(), records.getValue()); } - + if (((recordCtr++) % recordsBeforeProgress) == 0) { progressable.progress(); if (Thread.currentThread().isInterrupted()) { - /** + /* * Takes care DefaultSorter.mergeParts, MergeManager's merger threads, * PipelinedSorter's flush(). This is not expensive check as it is carried out every * 10000 records or so. */ throw new InterruptedException("Current thread=" + Thread.currentThread().getName() + " got " - + "interrupted"); + + "interrupted"); } } } @@ -235,7 +250,7 @@ static class KeyValueBuffer { private int position; private int length; - public KeyValueBuffer(byte buf[], int position, int length) { + KeyValueBuffer(byte[] buf, int position, int length) { reset(buf, position, length); } @@ -264,7 +279,7 @@ public static class Segment { static final byte[] EMPTY_BYTES = new byte[0]; Reader reader = null; final KeyValueBuffer key = new KeyValueBuffer(EMPTY_BYTES, 0, 0); - TezCounter mapOutputsCounter = null; + private TezCounter mapOutputsCounter; public Segment(Reader reader, TezCounter mapOutputsCounter) { this.reader = reader; @@ -343,15 +358,17 @@ void reinitReader(int offset) throws IOException { @InterfaceStability.Unstable public static class DiskSegment extends Segment { - FileSystem fs = null; - Path file = null; - boolean preserve = false; // Signifies whether the segment should be kept after a merge is complete. Checked in the close method. - CompressionCodec codec = null; - long segmentOffset = 0; - long segmentLength = -1; + private FileSystem fs; + private Path file; + + // Signifies whether the segment should be kept after a merge is complete. Checked in the close method. + private boolean preserve; + private CompressionCodec codec; + private long segmentOffset; + private long segmentLength; boolean ifileReadAhead; int ifileReadAheadLength; - int bufferSize = -1; + private int bufferSize; public DiskSegment(FileSystem fs, Path file, CompressionCodec codec, boolean ifileReadAhead, @@ -362,19 +379,19 @@ public DiskSegment(FileSystem fs, Path file, } public DiskSegment(FileSystem fs, Path file, - CompressionCodec codec, boolean ifileReadAhead, int ifileReadAheadLenth, + CompressionCodec codec, boolean ifileReadAhead, int ifileReadAheadLength, int bufferSize, boolean preserve, TezCounter mergedMapOutputsCounter) throws IOException { this(fs, file, 0, fs.getFileStatus(file).getLen(), codec, - ifileReadAhead, ifileReadAheadLenth, bufferSize, preserve, + ifileReadAhead, ifileReadAheadLength, bufferSize, preserve, mergedMapOutputsCounter); } public DiskSegment(FileSystem fs, Path file, long segmentOffset, long segmentLength, CompressionCodec codec, boolean ifileReadAhead, - int ifileReadAheadLength, int bufferSize, - boolean preserve) throws IOException { + int ifileReadAheadLength, int bufferSize, + boolean preserve) { this(fs, file, segmentOffset, segmentLength, codec, ifileReadAhead, ifileReadAheadLength, bufferSize, preserve, null); } @@ -382,8 +399,7 @@ public DiskSegment(FileSystem fs, Path file, public DiskSegment(FileSystem fs, Path file, long segmentOffset, long segmentLength, CompressionCodec codec, boolean ifileReadAhead, int ifileReadAheadLength, int bufferSize, - boolean preserve, TezCounter mergedMapOutputsCounter) - throws IOException { + boolean preserve, TezCounter mergedMapOutputsCounter) { super(null, mergedMapOutputsCounter); this.fs = fs; this.file = file; @@ -445,7 +461,7 @@ void reinitReader(int offset) throws IOException { } @VisibleForTesting - static class MergeQueue + static class MergeQueue extends PriorityQueue implements TezRawKeyValueIterator { final Configuration conf; final FileSystem fs; @@ -455,9 +471,9 @@ static class MergeQueue static final int ifileReadAheadLength = TezRuntimeConfiguration.TEZ_RUNTIME_IFILE_READAHEAD_BYTES_DEFAULT; static final int ifileBufferSize = TezRuntimeConfiguration.TEZ_RUNTIME_IFILE_BUFFER_SIZE_DEFAULT; static final long recordsBeforeProgress = TezRuntimeConfiguration.TEZ_RUNTIME_RECORDS_BEFORE_PROGRESS_DEFAULT; - - List segments = new ArrayList(); - + + private List segments = new ArrayList<>(); + final RawComparator comparator; private long totalBytesProcessed; @@ -469,14 +485,14 @@ static class MergeQueue private final boolean considerFinalMergeForProgress; final Progressable reporter; - + final DataInputBuffer key = new DataInputBuffer(); final DataInputBuffer value = new DataInputBuffer(); final DataInputBuffer nextKey = new DataInputBuffer(); final DataInputBuffer diskIFileValue = new DataInputBuffer(); - + Segment minSegment; - Comparator segmentComparator = + Comparator segmentComparator = new Comparator() { public int compare(Segment o1, Segment o2) { if (o1.getLength() == o2.getLength()) { @@ -490,13 +506,13 @@ public int compare(Segment o1, Segment o2) { KeyState hasNext; DataOutputBuffer prevKey = new DataOutputBuffer(); - public MergeQueue(Configuration conf, FileSystem fs, + public MergeQueue(Configuration conf, FileSystem fs, Path[] inputs, boolean deleteInputs, CompressionCodec codec, boolean ifileReadAhead, int ifileReadAheadLength, int ifileBufferSize, boolean considerFinalMergeForProgress, - RawComparator comparator, Progressable reporter, - TezCounter mergedMapOutputsCounter) + RawComparator comparator, Progressable reporter, + TezCounter mergedMapOutputsCounter) throws IOException { this.conf = conf; this.checkForSameKeys = true; @@ -508,23 +524,23 @@ public MergeQueue(Configuration conf, FileSystem fs, this.comparator = comparator; this.reporter = reporter; this.considerFinalMergeForProgress = considerFinalMergeForProgress; - + for (Path file : inputs) { if (LOG.isTraceEnabled()) { LOG.trace("MergeQ: adding: " + file); } segments.add(new DiskSegment(fs, file, codec, ifileReadAhead, ifileReadAheadLength, ifileBufferSize, - !deleteInputs, + !deleteInputs, (file.toString().endsWith( - Constants.MERGED_OUTPUT_PREFIX) ? + Constants.MERGED_OUTPUT_PREFIX) ? null : mergedMapOutputsCounter))); } - + // Sort segments on file-lengths - Collections.sort(segments, segmentComparator); + segments.sort(segmentComparator); } - + public MergeQueue(Configuration conf, FileSystem fs, List segments, RawComparator comparator, Progressable reporter, boolean sortSegments, boolean considerFinalMergeForProgress) { @@ -536,7 +552,7 @@ public MergeQueue(Configuration conf, FileSystem fs, List segments, RawComparator comparator, Progressable reporter, boolean sortSegments, CompressionCodec codec, boolean considerFinalMergeForProgress) { - this(conf, fs, segments, comparator, reporter, sortSegments, null, + this(conf, fs, segments, comparator, reporter, sortSegments, codec, considerFinalMergeForProgress, true); } @@ -551,7 +567,7 @@ public MergeQueue(Configuration conf, FileSystem fs, this.reporter = reporter; this.considerFinalMergeForProgress = considerFinalMergeForProgress; if (sortSegments) { - Collections.sort(segments, segmentComparator); + segments.sort(segmentComparator); } this.checkForSameKeys = checkForSameKeys; this.codec = codec; @@ -581,7 +597,7 @@ private void adjustPriorityQueue(Segment reader) throws IOException{ long startPos = reader.getPosition(); if (checkForSameKeys) { if (hasNext == null) { - /** + /* * hasNext can be null during first iteration & prevKey is initialized here. * In cases of NO_KEY/NEW_KEY, we readjust the queue later. If new segment/file is found * during this process, we need to compare keys for RLE across segment boundaries. @@ -591,7 +607,7 @@ private void adjustPriorityQueue(Segment reader) throws IOException{ } else { //indicates a key has been read already if (hasNext != KeyState.SAME_KEY) { - /** + /* * Store previous key before reading next for later key comparisons. * If all keys in a segment are unique, it would always hit this code path and key copies * are wasteful in such condition, as these comparisons are mainly done for RLE. @@ -620,9 +636,6 @@ private void adjustPriorityQueue(Segment reader) throws IOException{ /** * Check if the previous key is same as the next top segment's key. * This would be useful to compute whether same key is spread across multiple segments. - * - * @param current - * @throws IOException */ void compareKeyWithNextTopKey(Segment current) throws IOException { Segment nextTop = top(); @@ -638,19 +651,10 @@ void compareKeyWithNextTopKey(Segment current) throws IOException { } public boolean next() throws IOException { - if (size() == 0) + if (!hasNext()) { return false; - - if (minSegment != null) { - //minSegment is non-null for all invocations of next except the first - //one. For the first invocation, the priority queue is ready for use - //but for the subsequent invocations, first adjust the queue - adjustPriorityQueue(minSegment); - if (size() == 0) { - minSegment = null; - return false; - } } + minSegment = top(); long startPos = minSegment.getPosition(); KeyValueBuffer nextKey = minSegment.getKey(); @@ -662,7 +666,7 @@ public boolean next() throws IOException { //the same byte[] since it would corrupt the data in the inmem //segment. So we maintain an explicit DIB for value bytes //obtained from disk, and if the current segment is a disk - //segment, we reset the "value" DIB to the byte[] in that (so + //segment, we reset the "value" DIB to the byte[] in that (so //we reuse the disk segment DIB whenever we consider //a disk segment). minSegment.getValue(diskIFileValue); @@ -693,23 +697,23 @@ protected boolean lessThan(Object a, Object b) { int s1 = key1.getPosition(); int l1 = key1.getLength(); int s2 = key2.getPosition(); - int l2 = key2.getLength();; + int l2 = key2.getLength(); return comparator.compare(key1.getData(), s1, l1, key2.getData(), s2, l2) < 0; } - - public TezRawKeyValueIterator merge(Class keyClass, Class valueClass, - int factor, Path tmpDir, - TezCounter readsCounter, - TezCounter writesCounter, - TezCounter bytesReadCounter, - Progress mergePhase) + + public TezRawKeyValueIterator merge(SerializationContext serializationContext, + int factor, Path tmpDir, + TezCounter readsCounter, + TezCounter writesCounter, + TezCounter bytesReadCounter, + Progress mergePhase) throws IOException, InterruptedException { - return merge(keyClass, valueClass, factor, 0, tmpDir, + return merge(serializationContext, factor, 0, tmpDir, readsCounter, writesCounter, bytesReadCounter, mergePhase); } - TezRawKeyValueIterator merge(Class keyClass, Class valueClass, + TezRawKeyValueIterator merge(SerializationContext serializationContext, int factor, int inMem, Path tmpDir, TezCounter readsCounter, TezCounter writesCounter, @@ -741,7 +745,7 @@ TezRawKeyValueIterator merge(Class keyClass, Class valueClass, if (totalBytes != 0) { progPerByte = 1.0f / (float)totalBytes; } - + //create the MergeStreams from the sorted map created in the constructor //and dump the final output to a file do { @@ -753,14 +757,14 @@ TezRawKeyValueIterator merge(Class keyClass, Class valueClass, factor += inMem; } List segmentsToMerge = - new ArrayList(); + new ArrayList<>(); int segmentsConsidered = 0; int numSegmentsToConsider = factor; long startBytes = 0; // starting bytes of segments of this merge while (true) { - //extract the smallest 'factor' number of segments + //extract the smallest 'factor' number of segments //Call cleanup on the empty segments (no key/value data) - List mStream = + List mStream = getSegmentDescriptors(numSegmentsToConsider); for (Segment segment : mStream) { // Initialize the segment at the last possible moment; @@ -770,7 +774,7 @@ TezRawKeyValueIterator merge(Class keyClass, Class valueClass, long startPos = segment.getPosition(); boolean hasNext = segment.nextRawKey(nextKey); long endPos = segment.getPosition(); - + if (hasNext) { startBytes += endPos - startPos; segmentsToMerge.add(segment); @@ -783,7 +787,7 @@ TezRawKeyValueIterator merge(Class keyClass, Class valueClass, } //if we have the desired number of segments //or looked at all available segments, we break - if (segmentsConsidered == factor || + if (segmentsConsidered == factor || segments.size() == 0) { break; } @@ -791,14 +795,14 @@ TezRawKeyValueIterator merge(Class keyClass, Class valueClass, // Get the correct # of segments in case some of them were empty. numSegmentsToConsider = factor - segmentsConsidered; } - + //feed the streams to the priority queue initialize(segmentsToMerge.size()); clear(); for (Segment segment : segmentsToMerge) { put(segment); } - + //if we have lesser number of segments remaining, then just return the //iterator, else do another single level merge if (numSegments <= factor) { // Will always kick in if only in-mem segments are provided. @@ -810,13 +814,13 @@ TezRawKeyValueIterator merge(Class keyClass, Class valueClass, // the 3rd phase of reduce task. totalBytesProcessed = 0; totalBytes = 0; - for (int i = 0; i < segmentsToMerge.size(); i++) { - totalBytes += segmentsToMerge.get(i).getLength(); + for (Segment segment : segmentsToMerge) { + totalBytes += segment.getLength(); } } if (totalBytes != 0) //being paranoid progPerByte = 1.0f / (float)totalBytes; - + totalBytesProcessed += startBytes; if (totalBytes != 0) mergeProgress.set(totalBytesProcessed * progPerByte); @@ -839,40 +843,41 @@ TezRawKeyValueIterator merge(Class keyClass, Class valueClass, " intermediate segments out of a total of " + (segments.size() + segmentsToMerge.size())); } - + long bytesProcessedInPrevMerges = totalBytesProcessed; totalBytesProcessed += startBytes; - //we want to spread the creation of temp files on multiple disks if + //we want to spread the creation of temp files on multiple disks if //available under the space constraints - long approxOutputSize = 0; + long approxOutputSize = 0; for (Segment s : segmentsToMerge) { - approxOutputSize += s.getLength() + + approxOutputSize += s.getLength() + ChecksumFileSystem.getApproxChkSumLength( s.getLength()); } - Path tmpFilename = + Path tmpFilename = new Path(tmpDir, "intermediate").suffix("." + passNo); - Path outputFile = lDirAlloc.getLocalPathForWrite( + Path outputFile = L_DIR_ALLOC.getLocalPathForWrite( tmpFilename.toString(), approxOutputSize, conf); // TODO Would it ever make sense to make this an in-memory writer ? // Merging because of too many disk segments - might fit in memory. - Writer writer = - new Writer(conf, fs, outputFile, keyClass, valueClass, codec, - writesCounter, null); + Writer writer = new Writer(serializationContext.getKeySerialization(), + serializationContext.getValSerialization(), fs, outputFile, + serializationContext.getKeyClass(), serializationContext.getValueClass(), codec, + writesCounter, null); writeFile(this, writer, reporter, recordsBeforeProgress); writer.close(); - - //we finished one single level merge; now clean up the priority + + //we finished one single level merge; now clean up the priority //queue this.close(); // Add the newly create segment to the list of segments to be merged - Segment tempSegment = + Segment tempSegment = new DiskSegment(fs, outputFile, codec, ifileReadAhead, ifileReadAheadLength, ifileBufferSize, false); @@ -885,8 +890,8 @@ TezRawKeyValueIterator merge(Class keyClass, Class valueClass, } segments.add(pos, tempSegment); numSegments = segments.size(); - - // Subtract the difference between expected size of new segment and + + // Subtract the difference between expected size of new segment and // actual size of new segment(Expected size of new segment is // inputBytesOfThisMerge) from totalBytes. Expected size and actual // size will match(almost) if combiner is not called in merge. @@ -896,15 +901,15 @@ TezRawKeyValueIterator merge(Class keyClass, Class valueClass, if (totalBytes != 0) { progPerByte = 1.0f / (float)totalBytes; } - + passNo++; } - //we are worried about only the first pass merge factor. So reset the + //we are worried about only the first pass merge factor. So reset the //factor to what it originally was factor = origFactor; } while(true); } - + /** * Determine the number of segments to merge in a given pass. Assuming more * than factor segments, the first pass should attempt to bring the total @@ -913,20 +918,22 @@ TezRawKeyValueIterator merge(Class keyClass, Class valueClass, */ private static int getPassFactor(int factor, int passNo, int numSegments) { // passNo > 1 in the OR list - is that correct ? - if (passNo > 1 || numSegments <= factor || factor == 1) + if (passNo > 1 || numSegments <= factor || factor == 1) { return factor; + } int mod = (numSegments - 1) % (factor - 1); - if (mod == 0) + if (mod == 0) { return factor; + } return mod + 1; } - + /** Return (& remove) the requested number of segment descriptors from the * sorted map. */ private List getSegmentDescriptors(int numDescriptors) { if (numDescriptors > segments.size()) { - List subList = new ArrayList(segments); + List subList = new ArrayList<>(segments); segments.clear(); return subList; } @@ -937,7 +944,7 @@ private List getSegmentDescriptors(int numDescriptors) { subList.clear(); return subListCopy; } - + /** * Compute expected size of input bytes to merges, will be used in * calculating mergeProgress. This simulates the above merge() method and @@ -956,13 +963,13 @@ static long computeBytesInMerges(List segments, int factor, int inMem, // factor for 1st pass int f = getPassFactor(factor, 1, n) + inMem; n = numSegments; - + for (int i = 0; i < numSegments; i++) { // Not handling empty segments here assuming that it would not affect // much in calculation of mergeProgress. segmentSizes[i] = segments.get(i).getLength(); } - + // If includeFinalMerge is true, allow the following while loop iterate // for 1 more iteration. This is to include final merge as part of the // computation of expected input bytes of merges @@ -979,7 +986,7 @@ static long computeBytesInMerges(List segments, int factor, int inMem, mergedSize += segmentSizes[offset + j]; } totalBytes += mergedSize; - + // insert new size into the sorted list int pos = Arrays.binarySearch(segmentSizes, offset, offset + n, mergedSize); if (pos < 0) { @@ -1036,6 +1043,24 @@ public boolean isSameKey() throws IOException { return (hasNext != null) && (hasNext == KeyState.SAME_KEY); } + public boolean hasNext() throws IOException { + if (size() == 0) + return false; + + if (minSegment != null) { + //minSegment is non-null for all invocations of next except the first + //one. For the first invocation, the priority queue is ready for use + //but for the subsequent invocations, first adjust the queue + adjustPriorityQueue(minSegment); + if (size() == 0) { + minSegment = null; + return false; + } + } + + return true; + } + } private static class EmptyIterator implements TezRawKeyValueIterator { @@ -1060,6 +1085,11 @@ public boolean next() throws IOException { return false; } + @Override + public boolean hasNext() throws IOException { + return false; + } + @Override public void close() throws IOException { } diff --git a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/sort/impl/TezRawKeyValueIterator.java b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/sort/impl/TezRawKeyValueIterator.java index 4e2ce3a7f0..683c9b9ce9 100644 --- a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/sort/impl/TezRawKeyValueIterator.java +++ b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/sort/impl/TezRawKeyValueIterator.java @@ -55,7 +55,16 @@ public interface TezRawKeyValueIterator { * @throws IOException */ boolean next() throws IOException; - + + /** + * Returns true if any items are left in the iterator. + * + * @return true if a call to next will succeed + * false otherwise. + * @throws IOException + */ + boolean hasNext() throws IOException; + /** * Closes the iterator so that the underlying streams can be closed. * diff --git a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/sort/impl/TezSpillRecord.java b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/sort/impl/TezSpillRecord.java index ab4142b51d..feed70f496 100644 --- a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/sort/impl/TezSpillRecord.java +++ b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/sort/impl/TezSpillRecord.java @@ -28,13 +28,17 @@ import org.apache.hadoop.fs.ChecksumException; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.util.PureJavaCrc32; +import org.apache.hadoop.util.functional.FutureIO; import org.apache.tez.runtime.library.common.Constants; public class TezSpillRecord { + public static final FsPermission SPILL_FILE_PERMS = new FsPermission((short) 0640); /** Backing store */ private final ByteBuffer buf; @@ -47,25 +51,27 @@ public TezSpillRecord(int numPartitions) { entries = buf.asLongBuffer(); } - public TezSpillRecord(Path indexFileName, Configuration job) throws IOException { - this(indexFileName, job, null); + public TezSpillRecord(Path indexFileName, Configuration conf) throws IOException { + this(indexFileName, FileSystem.getLocal(conf).getRaw()); } - public TezSpillRecord(Path indexFileName, Configuration job, String expectedIndexOwner) + public TezSpillRecord(Path indexFileName, FileSystem fs) throws IOException { + this(indexFileName, fs, null); + } + + public TezSpillRecord(Path indexFileName, FileSystem fs, String expectedIndexOwner) throws IOException { - this(indexFileName, job, new PureJavaCrc32(), expectedIndexOwner); + this(indexFileName, fs, new PureJavaCrc32(), expectedIndexOwner); } - public TezSpillRecord(Path indexFileName, Configuration job, Checksum crc, + public TezSpillRecord(Path indexFileName, FileSystem rfs, Checksum crc, String expectedIndexOwner) throws IOException { - final FileSystem rfs = FileSystem.getLocal(job).getRaw(); - final FSDataInputStream in = rfs.open(indexFileName); - try { - final long length = rfs.getFileStatus(indexFileName).getLen(); - final int partitions = - (int) length / Constants.MAP_OUTPUT_INDEX_RECORD_LENGTH; + FileStatus fileStatus = rfs.getFileStatus(indexFileName); + final long length = fileStatus.getLen(); + try (FSDataInputStream in = FutureIO.awaitFuture(rfs.openFile(indexFileName).withFileStatus(fileStatus).build())) { + final int partitions = (int) length / Constants.MAP_OUTPUT_INDEX_RECORD_LENGTH; final int size = partitions * Constants.MAP_OUTPUT_INDEX_RECORD_LENGTH; buf = ByteBuffer.allocate(size); @@ -74,15 +80,12 @@ public TezSpillRecord(Path indexFileName, Configuration job, Checksum crc, CheckedInputStream chk = new CheckedInputStream(in, crc); IOUtils.readFully(chk, buf.array(), 0, size); if (chk.getChecksum().getValue() != in.readLong()) { - throw new ChecksumException("Checksum error reading spill index: " + - indexFileName, -1); + throw new ChecksumException("Checksum error reading spill index: " + indexFileName, -1); } } else { IOUtils.readFully(in, buf.array(), 0, size); } entries = buf.asLongBuffer(); - } finally { - in.close(); } } @@ -115,14 +118,12 @@ public void putIndex(TezIndexRecord rec, int partition) { /** * Write this spill record to the location provided. */ - public void writeToFile(Path loc, Configuration job) - throws IOException { - writeToFile(loc, job, new PureJavaCrc32()); + public void writeToFile(Path loc, Configuration job, FileSystem fs) throws IOException { + writeToFile(loc, job, fs, new PureJavaCrc32()); } - public void writeToFile(Path loc, Configuration job, Checksum crc) + public void writeToFile(Path loc, Configuration job, FileSystem rfs, Checksum crc) throws IOException { - final FileSystem rfs = FileSystem.getLocal(job).getRaw(); CheckedOutputStream chk = null; final FSDataOutputStream out = rfs.create(loc); try { @@ -140,7 +141,13 @@ public void writeToFile(Path loc, Configuration job, Checksum crc) } else { out.close(); } + ensureSpillFilePermissions(loc, rfs); } } + public static void ensureSpillFilePermissions(Path loc, FileSystem rfs) throws IOException { + if (!SPILL_FILE_PERMS.equals(SPILL_FILE_PERMS.applyUMask(FsPermission.getUMask(rfs.getConf())))) { + rfs.setPermission(loc, SPILL_FILE_PERMS); + } + } } diff --git a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/sort/impl/dflt/DefaultSorter.java b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/sort/impl/dflt/DefaultSorter.java index 268e237afe..6354c7cc41 100644 --- a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/sort/impl/dflt/DefaultSorter.java +++ b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/sort/impl/dflt/DefaultSorter.java @@ -61,7 +61,9 @@ import org.apache.tez.runtime.library.common.sort.impl.TezMerger.DiskSegment; import org.apache.tez.runtime.library.common.sort.impl.TezMerger.Segment; -import com.google.common.base.Preconditions; +import org.apache.tez.common.Preconditions; + +import static org.apache.tez.runtime.library.common.sort.impl.TezSpillRecord.ensureSpillFilePermissions; @SuppressWarnings({"unchecked", "rawtypes"}) public final class DefaultSorter extends ExternalSorter implements IndexedSortable { @@ -151,7 +153,7 @@ public DefaultSorter(OutputContext outputContext, Configuration conf, int numOut .TEZ_RUNTIME_PIPELINED_SHUFFLE_ENABLED_DEFAULT); if (confPipelinedShuffle) { - LOG.warn(outputContext.getDestinationVertexName() + ": " + + LOG.warn(outputContext.getInputOutputVertexNames() + ": " + TezRuntimeConfiguration.TEZ_RUNTIME_PIPELINED_SHUFFLE_ENABLED + " does not work " + "with DefaultSorter. It is supported only with PipelinedSorter."); } @@ -247,14 +249,14 @@ public void write(Object key, Object value) synchronized void collect(Object key, Object value, final int partition ) throws IOException { - if (key.getClass() != keyClass) { + if (key.getClass() != serializationContext.getKeyClass()) { throw new IOException("Type mismatch in key from map: expected " - + keyClass.getName() + ", received " + + serializationContext.getKeyClass().getName() + ", received " + key.getClass().getName()); } - if (value.getClass() != valClass) { + if (value.getClass() != serializationContext.getValueClass()) { throw new IOException("Type mismatch in value from map: expected " - + valClass.getName() + ", received " + + serializationContext.getValueClass().getName() + ", received " + value.getClass().getName()); } if (partition < 0 || partition >= partitions) { @@ -369,7 +371,8 @@ synchronized void collect(Object key, Object value, final int partition kvindex = (int)(((long)kvindex - NMETA + kvmeta.capacity()) % kvmeta.capacity()); totalKeys++; } catch (MapBufferTooSmallException e) { - LOG.info(outputContext.getDestinationVertexName() + ": Record too large for in-memory buffer: " + e.getMessage()); + LOG.info( + outputContext.getInputOutputVertexNames() + ": Record too large for in-memory buffer: " + e.getMessage()); spillSingleRecord(key, value, partition); mapOutputRecordCounter.increment(1); return; @@ -388,7 +391,7 @@ private void setEquator(int pos) { // Cast one of the operands to long to avoid integer overflow kvindex = (int) (((long) aligned - METASIZE + kvbuffer.length) % kvbuffer.length) / 4; if (LOG.isInfoEnabled()) { - LOG.info(outputContext.getDestinationVertexName() + ": " + "(EQUATOR) " + pos + " kvi " + kvindex + + LOG.info(outputContext.getInputOutputVertexNames() + ": " + "(EQUATOR) " + pos + " kvi " + kvindex + "(" + (kvindex * 4) + ")"); } } @@ -406,7 +409,7 @@ private void resetSpill() { // Cast one of the operands to long to avoid integer overflow kvstart = kvend = (int) (((long) aligned - METASIZE + kvbuffer.length) % kvbuffer.length) / 4; if (LOG.isInfoEnabled()) { - LOG.info(outputContext.getDestinationVertexName() + ": " + "(RESET) equator " + e + " kv " + kvstart + "(" + + LOG.info(outputContext.getInputOutputVertexNames() + ": " + "(RESET) equator " + e + " kv " + kvstart + "(" + (kvstart * 4) + ")" + " kvi " + kvindex + "(" + (kvindex * 4) + ")"); } } @@ -662,7 +665,7 @@ void interruptSpillThread() throws IOException { spillThread.interrupt(); spillThread.join(); } catch (InterruptedException e) { - LOG.info(outputContext.getDestinationVertexName() + ": " + "Spill thread interrupted"); + LOG.info(outputContext.getInputOutputVertexNames() + ": " + "Spill thread interrupted"); //Reset status Thread.currentThread().interrupt(); throw new IOInterruptedException("Spill failed", e); @@ -671,7 +674,7 @@ void interruptSpillThread() throws IOException { @Override public void flush() throws IOException { - LOG.info(outputContext.getDestinationVertexName() + ": " + "Starting flush of map output"); + LOG.info(outputContext.getInputOutputVertexNames() + ": " + "Starting flush of map output"); outputContext.notifyProgress(); if (Thread.currentThread().isInterrupted()) { /** @@ -708,7 +711,7 @@ public void flush() throws IOException { bufend = bufmark; if (LOG.isInfoEnabled()) { LOG.info( - outputContext.getDestinationVertexName() + ": " + "Sorting & Spilling map output. " + outputContext.getInputOutputVertexNames() + ": " + "Sorting & Spilling map output. " + "bufstart = " + bufstart + ", bufend = " + bufmark + ", bufvoid = " + bufvoid + "; " + "kvstart=" + kvstart + "(" + (kvstart * 4) + ")" + ", kvend = " + kvend + "(" + (kvend * 4) + ")" @@ -750,10 +753,10 @@ public void flush() throws IOException { } @Override - public void close() throws IOException { - super.close(); + public List close() throws IOException { kvbuffer = null; kvmeta = null; + return super.close(); } boolean isClosed() { @@ -779,7 +782,7 @@ public void run() { spillLock.unlock(); sortAndSpill(sameKeyCount, totalKeysCount); } catch (Throwable t) { - LOG.warn(outputContext.getDestinationVertexName() + ": " + "Got an exception in sortAndSpill", t); + LOG.warn(outputContext.getInputOutputVertexNames() + ": " + "Got an exception in sortAndSpill", t); sortSpillException = t; } finally { spillLock.lock(); @@ -792,7 +795,7 @@ public void run() { } } } catch (InterruptedException e) { - LOG.info(outputContext.getDestinationVertexName() + ": " + "Spill thread interrupted"); + LOG.info(outputContext.getInputOutputVertexNames() + ": " + "Spill thread interrupted"); Thread.currentThread().interrupt(); } finally { spillLock.unlock(); @@ -828,7 +831,7 @@ private void startSpill() { bufend = bufmark; spillInProgress = true; if (LOG.isInfoEnabled()) { - LOG.info(outputContext.getDestinationVertexName() + ": Spilling map output." + LOG.info(outputContext.getInputOutputVertexNames() + ": Spilling map output." + "bufstart=" + bufstart + ", bufend = " + bufmark + ", bufvoid = " + bufvoid +"; kvstart=" + kvstart + "(" + (kvstart * 4) + ")" +", kvend = " + kvend + "(" + (kvend * 4) + ")" @@ -893,6 +896,7 @@ protected void spill(int mstart, int mend, long sameKeyCount, long totalKeysCoun mapOutputFile.getSpillFileForWrite(numSpills, size); spillFilePaths.put(numSpills, filename); out = rfs.create(filename); + ensureSpillFilePermissions(filename, rfs); int spindex = mstart; final InMemValBytes value = createInMemValBytes(); @@ -903,8 +907,9 @@ protected void spill(int mstart, int mend, long sameKeyCount, long totalKeysCoun long segmentStart = out.getPos(); if (spindex < mend && kvmeta.get(offsetFor(spindex) + PARTITION) == i || !sendEmptyPartitionDetails) { - writer = new Writer(conf, out, keyClass, valClass, codec, - spilledRecordsCounter, null, rle); + writer = new Writer(serializationContext.getKeySerialization(), + serializationContext.getValSerialization(), out, serializationContext.getKeyClass(), + serializationContext.getValueClass(), codec, spilledRecordsCounter, null, rle); } if (combiner == null) { // spill directly @@ -932,7 +937,7 @@ protected void spill(int mstart, int mend, long sameKeyCount, long totalKeysCoun TezRawKeyValueIterator kvIter = new MRResultIterator(spstart, spindex); if (LOG.isDebugEnabled()) { - LOG.debug(outputContext.getDestinationVertexName() + ": " + "Running combine processor"); + LOG.debug(outputContext.getInputOutputVertexNames() + ": " + "Running combine processor"); } runCombineProcessor(kvIter, writer); } @@ -951,7 +956,7 @@ protected void spill(int mstart, int mend, long sameKeyCount, long totalKeysCoun new TezIndexRecord(segmentStart, rawLength, partLength); spillRec.putIndex(rec, i); if (!isFinalMergeEnabled() && reportPartitionStats() && writer != null) { - partitionStats[i] += partLength; + partitionStats[i] += rawLength; } writer = null; } finally { @@ -965,13 +970,14 @@ protected void spill(int mstart, int mend, long sameKeyCount, long totalKeysCoun mapOutputFile.getSpillIndexFileForWrite(numSpills, partitions * MAP_OUTPUT_INDEX_RECORD_LENGTH); spillFileIndexPaths.put(numSpills, indexFilename); - spillRec.writeToFile(indexFilename, conf); + spillRec.writeToFile(indexFilename, conf, localFs); } else { indexCacheList.add(spillRec); totalIndexCacheMemory += spillRec.size() * MAP_OUTPUT_INDEX_RECORD_LENGTH; } - LOG.info(outputContext.getDestinationVertexName() + ": " + "Finished spill " + numSpills); + LOG.info(outputContext.getInputOutputVertexNames() + ": " + "Finished spill " + numSpills + + " at " + filename.toString()); ++numSpills; if (!isFinalMergeEnabled()) { numShuffleChunks.setValue(numSpills); @@ -1000,6 +1006,7 @@ private void spillSingleRecord(final Object key, final Object value, mapOutputFile.getSpillFileForWrite(numSpills, size); spillFilePaths.put(numSpills, filename); out = rfs.create(filename); + ensureSpillFilePermissions(filename, rfs); // we don't run the combiner for a single record for (int i = 0; i < partitions; ++i) { @@ -1008,8 +1015,9 @@ private void spillSingleRecord(final Object key, final Object value, long segmentStart = out.getPos(); // Create a new codec, don't care! if (!sendEmptyPartitionDetails || (i == partition)) { - writer = new Writer(conf, out, keyClass, valClass, codec, - spilledRecordsCounter, null, false); + writer = new Writer(serializationContext.getKeySerialization(), + serializationContext.getValSerialization(), out, serializationContext.getKeyClass(), + serializationContext.getValueClass(), codec, spilledRecordsCounter, null, false); } if (i == partition) { final long recordStart = out.getPos(); @@ -1043,7 +1051,7 @@ private void spillSingleRecord(final Object key, final Object value, mapOutputFile.getSpillIndexFileForWrite(numSpills, partitions * MAP_OUTPUT_INDEX_RECORD_LENGTH); spillFileIndexPaths.put(numSpills, indexFilename); - spillRec.writeToFile(indexFilename, conf); + spillRec.writeToFile(indexFilename, conf, localFs); } else { indexCacheList.add(spillRec); totalIndexCacheMemory += @@ -1122,6 +1130,12 @@ public MRResultIterator(int start, int end) { this.end = end; current = start - 1; } + + @Override + public boolean hasNext() throws IOException { + return (current + 1) < end; + } + public boolean next() throws IOException { return ++current < end; } @@ -1159,7 +1173,7 @@ private void maybeSendEventForSpill(List events, boolean isLastEvent, outputContext, index, spillRecord, partitions, sendEmptyPartitionDetails, pathComponent, partitionStats, reportDetailedPartitionStats(), auxiliaryService, deflater); - LOG.info(outputContext.getDestinationVertexName() + ": " + + LOG.info(outputContext.getInputOutputVertexNames() + ": " + "Adding spill event for spill (final update=" + isLastEvent + "), spillId=" + index); if (sendEvent) { @@ -1177,13 +1191,13 @@ private void maybeAddEventsForSpills() throws IOException { TezSpillRecord spillRecord = indexCacheList.get(i); if (spillRecord == null) { //File was already written and location is stored in spillFileIndexPaths - spillRecord = new TezSpillRecord(spillFileIndexPaths.get(i), conf); + spillRecord = new TezSpillRecord(spillFileIndexPaths.get(i), localFs); } else { //Double check if this file has to be written if (spillFileIndexPaths.get(i) == null) { Path indexPath = mapOutputFile.getSpillIndexFileForWrite(i, partitions * MAP_OUTPUT_INDEX_RECORD_LENGTH); - spillRecord.writeToFile(indexPath, conf); + spillRecord.writeToFile(indexPath, conf, localFs); } } @@ -1212,10 +1226,10 @@ private void mergeParts() throws IOException, InterruptedException { sameVolRename(filename[0], finalOutputFile); if (indexCacheList.size() == 0) { sameVolRename(spillFileIndexPaths.get(0), finalIndexFile); - spillRecord = new TezSpillRecord(finalIndexFile, conf); + spillRecord = new TezSpillRecord(finalIndexFile, localFs); } else { spillRecord = indexCacheList.get(0); - spillRecord.writeToFile(finalIndexFile, conf); + spillRecord.writeToFile(finalIndexFile, conf, localFs); } } else { List events = Lists.newLinkedList(); @@ -1223,14 +1237,14 @@ private void mergeParts() throws IOException, InterruptedException { spillRecord = indexCacheList.get(0); Path indexPath = mapOutputFile.getSpillIndexFileForWrite(numSpills-1, partitions * MAP_OUTPUT_INDEX_RECORD_LENGTH); - spillRecord.writeToFile(indexPath, conf); + spillRecord.writeToFile(indexPath, conf, localFs); maybeSendEventForSpill(events, true, spillRecord, 0, true); fileOutputByteCounter.increment(rfs.getFileStatus(spillFilePaths.get(0)).getLen()); //No need to populate finalIndexFile, finalOutputFile etc when finalMerge is disabled } if (spillRecord != null && reportPartitionStats()) { for(int i=0; i < spillRecord.size(); i++) { - partitionStats[i] += spillRecord.getIndex(i).getPartLength(); + partitionStats[i] += spillRecord.getIndex(i).getRawLength(); } } numShuffleChunks.setValue(numSpills); @@ -1240,7 +1254,7 @@ private void mergeParts() throws IOException, InterruptedException { // read in paged indices for (int i = indexCacheList.size(); i < numSpills; ++i) { Path indexFileName = spillFileIndexPaths.get(i); - indexCacheList.add(new TezSpillRecord(indexFileName, conf)); + indexCacheList.add(new TezSpillRecord(indexFileName, localFs)); } //Check if it is needed to do final merge. Or else, exit early. @@ -1267,6 +1281,7 @@ private void mergeParts() throws IOException, InterruptedException { //The output stream for the final single output file FSDataOutputStream finalOut = rfs.create(finalOutputFile, true, 4096); + ensureSpillFilePermissions(finalOutputFile, rfs); if (numSpills == 0) { // TODO Change event generation to say there is no data rather than generating a dummy file @@ -1279,7 +1294,10 @@ private void mergeParts() throws IOException, InterruptedException { long segmentStart = finalOut.getPos(); if (!sendEmptyPartitionDetails) { Writer writer = - new Writer(conf, finalOut, keyClass, valClass, codec, null, null); + new Writer(serializationContext.getKeySerialization(), + serializationContext.getValSerialization(), finalOut, + serializationContext.getKeyClass(), serializationContext.getValueClass(), codec, + null, null); writer.close(); rawLength = writer.getRawLength(); partLength = writer.getCompressedLength(); @@ -1290,7 +1308,7 @@ private void mergeParts() throws IOException, InterruptedException { outputBytesWithOverheadCounter.increment(rawLength); sr.putIndex(rec, i); } - sr.writeToFile(finalIndexFile, conf); + sr.writeToFile(finalIndexFile, conf, localFs); } finally { finalOut.close(); } @@ -1322,7 +1340,7 @@ private void mergeParts() throws IOException, InterruptedException { segmentList.add(s); } if (LOG.isDebugEnabled()) { - LOG.debug(outputContext.getDestinationVertexName() + ": " + LOG.debug(outputContext.getInputOutputVertexNames() + ": " + "TaskIdentifier=" + taskIdentifier + " Partition=" + parts + "Spill =" + i + "(" + indexRecord.getStartOffset() + "," + indexRecord.getRawLength() + ", " + @@ -1337,7 +1355,7 @@ private void mergeParts() throws IOException, InterruptedException { boolean sortSegments = segmentList.size() > mergeFactor; //merge TezRawKeyValueIterator kvIter = TezMerger.merge(conf, rfs, - keyClass, valClass, codec, + serializationContext, codec, segmentList, mergeFactor, new Path(taskIdentifier), (RawComparator)ConfigUtils.getIntermediateOutputKeyComparator(conf), @@ -1350,9 +1368,10 @@ private void mergeParts() throws IOException, InterruptedException { long rawLength = 0; long partLength = 0; if (shouldWrite) { - Writer writer = - new Writer(conf, finalOut, keyClass, valClass, codec, - spilledRecordsCounter, null); + Writer writer = new Writer(serializationContext.getKeySerialization(), + serializationContext.getValSerialization(), finalOut, + serializationContext.getKeyClass(), serializationContext.getValueClass(), codec, + spilledRecordsCounter, null); if (combiner == null || numSpills < minSpillsForCombine) { TezMerger.writeFile(kvIter, writer, progressable, TezRuntimeConfiguration.TEZ_RUNTIME_RECORDS_BEFORE_PROGRESS_DEFAULT); @@ -1369,11 +1388,11 @@ private void mergeParts() throws IOException, InterruptedException { new TezIndexRecord(segmentStart, rawLength, partLength); spillRec.putIndex(rec, parts); if (reportPartitionStats()) { - partitionStats[parts] += partLength; + partitionStats[parts] += rawLength; } } numShuffleChunks.setValue(1); //final merge has happened - spillRec.writeToFile(finalIndexFile, conf); + spillRec.writeToFile(finalIndexFile, conf, localFs); finalOut.close(); for(int i = 0; i < numSpills; i++) { rfs.delete(filename[i],true); diff --git a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/task/local/output/TezTaskOutputFiles.java b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/task/local/output/TezTaskOutputFiles.java index 97a250913f..3fb90865d1 100644 --- a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/task/local/output/TezTaskOutputFiles.java +++ b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/task/local/output/TezTaskOutputFiles.java @@ -20,7 +20,7 @@ import java.io.IOException; -import com.google.common.base.Preconditions; +import org.apache.tez.common.Preconditions; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.classification.InterfaceAudience; @@ -65,11 +65,7 @@ public TezTaskOutputFiles(Configuration conf, String uniqueId, int dagID) { * if service_id = tez_shuffle then "${appDir}/dagId/output/${uniqueId}" */ private Path getAttemptOutputDir() { - if (LOG.isDebugEnabled()) { - LOG.debug("getAttemptOutputDir: " - + Constants.TEZ_RUNTIME_TASK_OUTPUT_DIR + "/" - + uniqueId); - } + LOG.debug("getAttemptOutputDir: {}/{}", Constants.TEZ_RUNTIME_TASK_OUTPUT_DIR, uniqueId); String dagPath = getDagOutputDir(Constants.TEZ_RUNTIME_TASK_OUTPUT_DIR); return new Path(dagPath, uniqueId); } diff --git a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/writers/BaseUnorderedPartitionedKVWriter.java b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/writers/BaseUnorderedPartitionedKVWriter.java index 30d1adb9fe..adea49fe80 100644 --- a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/writers/BaseUnorderedPartitionedKVWriter.java +++ b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/writers/BaseUnorderedPartitionedKVWriter.java @@ -22,14 +22,15 @@ import java.util.Iterator; import java.util.List; +import org.apache.hadoop.io.serializer.Serialization; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.RawLocalFileSystem; import org.apache.hadoop.io.compress.CompressionCodec; -import org.apache.hadoop.io.compress.DefaultCodec; import org.apache.hadoop.io.serializer.SerializationFactory; import org.apache.hadoop.io.serializer.Serializer; -import org.apache.hadoop.util.ReflectionUtils; import org.apache.tez.common.counters.TaskCounter; import org.apache.tez.common.counters.TezCounter; import org.apache.tez.runtime.api.Event; @@ -40,6 +41,7 @@ import org.apache.tez.runtime.library.common.ConfigUtils; import org.apache.tez.runtime.library.common.TezRuntimeUtils; import org.apache.tez.runtime.library.common.task.local.output.TezTaskOutput; +import org.apache.tez.runtime.library.utils.CodecUtils; @SuppressWarnings("rawtypes") public abstract class BaseUnorderedPartitionedKVWriter extends KeyValuesWriter { @@ -48,12 +50,15 @@ public abstract class BaseUnorderedPartitionedKVWriter extends KeyValuesWriter { protected final OutputContext outputContext; protected final Configuration conf; + protected final RawLocalFileSystem localFs; protected final Partitioner partitioner; protected final Class keyClass; protected final Class valClass; protected final Serializer keySerializer; protected final Serializer valSerializer; protected final SerializationFactory serializationFactory; + protected final Serialization keySerialization; + protected final Serialization valSerialization; protected final int numPartitions; protected final CompressionCodec codec; protected final TezTaskOutput outputFileHandler; @@ -101,18 +106,30 @@ public abstract class BaseUnorderedPartitionedKVWriter extends KeyValuesWriter { */ protected final TezCounter numAdditionalSpillsCounter; + /** + * Represents the number of bytes that is transmitted via the event. + */ + protected final TezCounter dataViaEventSize; + @SuppressWarnings("unchecked") public BaseUnorderedPartitionedKVWriter(OutputContext outputContext, Configuration conf, int numOutputs) { this.outputContext = outputContext; this.conf = conf; + try { + this.localFs = (RawLocalFileSystem) FileSystem.getLocal(conf).getRaw(); + } catch (IOException e) { + throw new RuntimeException(e); + } this.numPartitions = numOutputs; // k/v serialization keyClass = ConfigUtils.getIntermediateOutputKeyClass(this.conf); valClass = ConfigUtils.getIntermediateOutputValueClass(this.conf); serializationFactory = new SerializationFactory(this.conf); - keySerializer = serializationFactory.getSerializer(keyClass); - valSerializer = serializationFactory.getSerializer(valClass); + keySerialization = serializationFactory.getSerialization(keyClass); + valSerialization = serializationFactory.getSerialization(valClass); + keySerializer = keySerialization.getSerializer(keyClass); + valSerializer = valSerialization.getSerializer(valClass); outputRecordBytesCounter = outputContext.getCounters().findCounter(TaskCounter.OUTPUT_BYTES); outputRecordsCounter = outputContext.getCounters().findCounter(TaskCounter.OUTPUT_RECORDS); @@ -122,16 +139,15 @@ public BaseUnorderedPartitionedKVWriter(OutputContext outputContext, Configurati additionalSpillBytesWritternCounter = outputContext.getCounters().findCounter(TaskCounter.ADDITIONAL_SPILLS_BYTES_WRITTEN); additionalSpillBytesReadCounter = outputContext.getCounters().findCounter(TaskCounter.ADDITIONAL_SPILLS_BYTES_READ); numAdditionalSpillsCounter = outputContext.getCounters().findCounter(TaskCounter.ADDITIONAL_SPILL_COUNT); - + dataViaEventSize = outputContext.getCounters().findCounter(TaskCounter.DATA_BYTES_VIA_EVENT); + // compression - if (ConfigUtils.shouldCompressIntermediateOutput(this.conf)) { - Class codecClass = - ConfigUtils.getIntermediateOutputCompressorClass(this.conf, DefaultCodec.class); - codec = ReflectionUtils.newInstance(codecClass, this.conf); - } else { - codec = null; + try { + this.codec = CodecUtils.getCodec(conf); + } catch (IOException e) { + throw new RuntimeException(e); } - + this.ifileReadAhead = this.conf.getBoolean( TezRuntimeConfiguration.TEZ_RUNTIME_IFILE_READAHEAD, TezRuntimeConfiguration.TEZ_RUNTIME_IFILE_READAHEAD_DEFAULT); diff --git a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/writers/UnorderedPartitionedKVWriter.java b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/writers/UnorderedPartitionedKVWriter.java index 6ea0385000..dbd4794b47 100644 --- a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/writers/UnorderedPartitionedKVWriter.java +++ b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/writers/UnorderedPartitionedKVWriter.java @@ -48,8 +48,10 @@ import org.apache.hadoop.fs.LocalFileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.DataInputBuffer; +import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.io.compress.CompressionCodec; import org.apache.tez.common.CallableWithNdc; +import org.apache.tez.common.GuavaShim; import org.apache.tez.common.TezCommonUtils; import org.apache.tez.common.TezUtilsInternal; import org.apache.tez.common.counters.TaskCounter; @@ -69,12 +71,13 @@ import org.apache.tez.runtime.library.common.sort.impl.IFile.Writer; import org.apache.tez.runtime.library.common.sort.impl.TezSpillRecord; import org.apache.tez.runtime.library.common.shuffle.ShuffleUtils; +import org.apache.tez.runtime.library.shuffle.impl.ShuffleUserPayloads; import org.apache.tez.runtime.library.shuffle.impl.ShuffleUserPayloads.DataMovementEventPayloadProto; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; +import org.apache.tez.common.Preconditions; import com.google.common.util.concurrent.FutureCallback; import com.google.common.util.concurrent.Futures; import com.google.common.util.concurrent.ListenableFuture; @@ -83,6 +86,8 @@ import com.google.common.util.concurrent.ThreadFactoryBuilder; import com.google.protobuf.ByteString; +import static org.apache.tez.runtime.library.common.sort.impl.TezSpillRecord.ensureSpillFilePermissions; + public class UnorderedPartitionedKVWriter extends BaseUnorderedPartitionedKVWriter { private static final Logger LOG = LoggerFactory.getLogger(UnorderedPartitionedKVWriter.class); @@ -99,7 +104,7 @@ public class UnorderedPartitionedKVWriter extends BaseUnorderedPartitionedKVWrit // Maybe setup a separate statistics class which can be shared between the // buffer and the main path instead of having multiple arrays. - private final String destNameTrimmed; + private final String sourceDestNameTrimmed; private final long availableMemory; @VisibleForTesting final WrappedBuffer[] buffers; @@ -111,8 +116,8 @@ public class UnorderedPartitionedKVWriter extends BaseUnorderedPartitionedKVWrit WrappedBuffer currentBuffer; private final FileSystem rfs; - private final List spillInfoList = Collections - .synchronizedList(new ArrayList()); + @VisibleForTesting + final List spillInfoList = Collections.synchronizedList(new ArrayList()); private final ListeningExecutorService spillExecutor; @@ -125,6 +130,8 @@ public class UnorderedPartitionedKVWriter extends BaseUnorderedPartitionedKVWrit // uncompressed size for each partition private final long[] sizePerPartition; private volatile long spilledSize = 0; + private boolean dataViaEventsEnabled; + private int dataViaEventsMaxSize; static final ThreadLocal deflater = new ThreadLocal() { @@ -171,7 +178,8 @@ public Deflater get() { Path finalOutPath; //for single partition cases (e.g UnorderedKVOutput) - private final IFile.Writer writer; + @VisibleForTesting + final IFile.Writer writer; @VisibleForTesting final boolean skipBuffers; @@ -189,13 +197,17 @@ public Deflater get() { private List filledBuffers = new ArrayList<>(); + // When enabled, uses in-mem ifile writer + private final boolean useCachedStream; + public UnorderedPartitionedKVWriter(OutputContext outputContext, Configuration conf, int numOutputs, long availableMemoryBytes) throws IOException { super(outputContext, conf, numOutputs); Preconditions.checkArgument(availableMemoryBytes >= 0, "availableMemory should be >= 0 bytes"); - this.destNameTrimmed = TezUtilsInternal.cleanVertexName(outputContext.getDestinationVertexName()); + this.sourceDestNameTrimmed = TezUtilsInternal.cleanVertexName(outputContext.getTaskVertexName()) + " -> " + + TezUtilsInternal.cleanVertexName(outputContext.getDestinationVertexName()); //Not checking for TEZ_RUNTIME_ENABLE_FINAL_MERGE_IN_OUTPUT as it might not add much value in // this case. Add it later if needed. boolean pipelinedShuffleConf = this.conf.getBoolean(TezRuntimeConfiguration @@ -207,6 +219,22 @@ public UnorderedPartitionedKVWriter(OutputContext outputContext, Configuration c this.pipelinedShuffle = pipelinedShuffleConf && !isFinalMergeEnabled; this.finalEvents = Lists.newLinkedList(); + this.dataViaEventsEnabled = conf.getBoolean( + TezRuntimeConfiguration.TEZ_RUNTIME_TRANSFER_DATA_VIA_EVENTS_ENABLED, + TezRuntimeConfiguration.TEZ_RUNTIME_TRANSFER_DATA_VIA_EVENTS_ENABLED_DEFAULT); + + // No max cap on size (intentional) + this.dataViaEventsMaxSize = conf.getInt( + TezRuntimeConfiguration.TEZ_RUNTIME_TRANSFER_DATA_VIA_EVENTS_MAX_SIZE, + TezRuntimeConfiguration.TEZ_RUNTIME_TRANSFER_DATA_VIA_EVENTS_MAX_SIZE_DEFAULT); + + boolean useCachedStreamConfig = conf.getBoolean( + TezRuntimeConfiguration.TEZ_RUNTIME_TRANSFER_DATA_VIA_EVENTS_SUPPORT_IN_MEM_FILE, + TezRuntimeConfiguration.TEZ_RUNTIME_TRANSFER_DATA_VIA_EVENTS_SUPPORT_IN_MEM_FILE_DEFAULT); + + this.useCachedStream = useCachedStreamConfig && (this.dataViaEventsEnabled && (numPartitions == 1) + && !pipelinedShuffle); + if (availableMemoryBytes == 0) { Preconditions.checkArgument(((numPartitions == 1) && !pipelinedShuffle), "availableMemory " + "can be set to 0 only when numPartitions=1 and " + TezRuntimeConfiguration @@ -230,7 +258,7 @@ public UnorderedPartitionedKVWriter(OutputContext outputContext, Configuration c buffers[0] = new WrappedBuffer(numOutputs, sizePerBuffer); numInitializedBuffers = 1; if (LOG.isDebugEnabled()) { - LOG.debug(destNameTrimmed + ": " + "Initializing Buffer #" + + LOG.debug(sourceDestNameTrimmed + ": " + "Initializing Buffer #" + numInitializedBuffers + " with size=" + sizePerBuffer); } currentBuffer = buffers[0]; @@ -267,22 +295,26 @@ public UnorderedPartitionedKVWriter(OutputContext outputContext, Configuration c outputLargeRecordsCounter = outputContext.getCounters().findCounter( TaskCounter.OUTPUT_LARGE_RECORDS); - - indexFileSizeEstimate = numPartitions * Constants.MAP_OUTPUT_INDEX_RECORD_LENGTH; if (numPartitions == 1 && !pipelinedShuffle) { //special case, where in only one partition is available. - finalOutPath = outputFileHandler.getOutputFileForWrite(); - finalIndexPath = outputFileHandler.getOutputIndexFileForWrite(indexFileSizeEstimate); skipBuffers = true; - writer = new IFile.Writer(conf, rfs, finalOutPath, keyClass, valClass, - codec, outputRecordsCounter, outputRecordBytesCounter); + if (this.useCachedStream) { + writer = new IFile.FileBackedInMemIFileWriter(keySerialization, valSerialization, rfs, + outputFileHandler, keyClass, valClass, codec, outputRecordsCounter, + outputRecordBytesCounter, dataViaEventsMaxSize); + } else { + finalOutPath = outputFileHandler.getOutputFileForWrite(); + writer = new IFile.Writer(keySerialization, valSerialization, rfs, finalOutPath, keyClass, valClass, + codec, outputRecordsCounter, outputRecordBytesCounter); + ensureSpillFilePermissions(finalOutPath, rfs); + } } else { skipBuffers = false; writer = null; } - LOG.info(destNameTrimmed + ": " + LOG.info(sourceDestNameTrimmed + ": " + "numBuffers=" + numBuffers + ", sizePerBuffer=" + sizePerBuffer + ", skipBuffers=" + skipBuffers @@ -292,7 +324,12 @@ public UnorderedPartitionedKVWriter(OutputContext outputContext, Configuration c + ", pipelinedShuffle=" + pipelinedShuffle + ", isFinalMergeEnabled=" + isFinalMergeEnabled + ", numPartitions=" + numPartitions - + ", reportPartitionStats=" + reportPartitionStats); + + ", reportPartitionStats=" + reportPartitionStats + + ", dataViaEventsEnabled=" + dataViaEventsEnabled + + ", dataViaEventsMaxSize=" + dataViaEventsMaxSize + + ", useCachedStreamConfig=" + useCachedStreamConfig + + ", useCachedStream=" + useCachedStream + ); } private static final int ALLOC_OVERHEAD = 64; @@ -457,7 +494,7 @@ private void setupNextBuffer() throws IOException { // Update overall stats final int filledBufferCount = filledBuffers.size(); if (LOG.isDebugEnabled() || (filledBufferCount % 10) == 0) { - LOG.info(destNameTrimmed + ": " + "Moving to next buffer. Total filled buffers: " + filledBufferCount); + LOG.info(sourceDestNameTrimmed + ": " + "Moving to next buffer. Total filled buffers: " + filledBufferCount); } updateGlobalStats(currentBuffer); @@ -495,7 +532,7 @@ private boolean scheduleSpill(boolean block) throws IOException { final int filledBufferCount = filledBuffers.size(); if (LOG.isDebugEnabled() || (filledBufferCount % 10) == 0) { - LOG.info(destNameTrimmed + ": triggering spill. filledBuffers.size=" + filledBufferCount); + LOG.info(sourceDestNameTrimmed + ": triggering spill. filledBuffers.size=" + filledBufferCount); } pendingSpillCount.incrementAndGet(); int spillNumber = numSpills.getAndIncrement(); @@ -504,7 +541,7 @@ private boolean scheduleSpill(boolean block) throws IOException { new ArrayList(filledBuffers), codec, spilledRecordsCounter, spillNumber)); filledBuffers.clear(); - Futures.addCallback(future, new SpillCallback(spillNumber)); + Futures.addCallback(future, new SpillCallback(spillNumber), GuavaShim.directExecutor()); // Update once per buffer (instead of every record) updateTezCountersAndNotify(); return true; @@ -587,7 +624,9 @@ protected SpillResult callInternal() throws IOException { this.spillPathDetails = getSpillPathDetails(false, -1, spillNumber); this.spillIndex = spillPathDetails.spillIndex; } + LOG.info("Writing spill " + spillNumber + " to " + spillPathDetails.outputFilePath.toString()); FSDataOutputStream out = rfs.create(spillPathDetails.outputFilePath); + ensureSpillFilePermissions(spillPathDetails.outputFilePath, rfs); TezSpillRecord spillRecord = new TezSpillRecord(numPartitions); DataInputBuffer key = new DataInputBuffer(); DataInputBuffer val = new DataInputBuffer(); @@ -604,7 +643,7 @@ protected SpillResult callInternal() throws IOException { continue; } if (writer == null) { - writer = new Writer(conf, out, keyClass, valClass, codec, null, null); + writer = new Writer(keySerialization, valSerialization, out, keyClass, valClass, codec, null, null); } numRecords += writePartition(buffer.partitionPositions[i], buffer, writer, key, val); } @@ -635,10 +674,10 @@ protected SpillResult callInternal() throws IOException { spillResult = new SpillResult(compressedLength, this.filledBuffers); handleSpillIndex(spillPathDetails, spillRecord); - LOG.info(destNameTrimmed + ": " + "Finished spill " + spillIndex); + LOG.info(sourceDestNameTrimmed + ": " + "Finished spill " + spillIndex); if (LOG.isDebugEnabled()) { - LOG.debug(destNameTrimmed + ": " + "Spill=" + spillIndex + ", indexPath=" + LOG.debug(sourceDestNameTrimmed + ": " + "Spill=" + spillIndex + ", indexPath=" + spillPathDetails.indexFilePath + ", outputPath=" + spillPathDetails.outputFilePath); } return spillResult; @@ -663,7 +702,7 @@ private long writePartition(int pos, WrappedBuffer wrappedBuffer, Writer writer, } public static long getInitialMemoryRequirement(Configuration conf, long maxAvailableTaskMemory) { - int initialMemRequestMb = conf.getInt( + long initialMemRequestMb = conf.getInt( TezRuntimeConfiguration.TEZ_RUNTIME_UNORDERED_OUTPUT_BUFFER_SIZE_MB, TezRuntimeConfiguration.TEZ_RUNTIME_UNORDERED_OUTPUT_BUFFER_SIZE_MB_DEFAULT); Preconditions.checkArgument(initialMemRequestMb != 0, @@ -674,6 +713,40 @@ public static long getInitialMemoryRequirement(Configuration conf, long maxAvail return reqBytes; } + private boolean canSendDataOverDME() throws IOException { + if (dataViaEventsEnabled + && this.useCachedStream + && this.finalOutPath == null) { + + // It is possible that in-mem writer spilled over to disk. Need to use + // that path as finalOutPath and set its permission. + + if (((IFile.FileBackedInMemIFileWriter) writer).isDataFlushedToDisk()) { + this.finalOutPath = + ((IFile.FileBackedInMemIFileWriter) writer).getOutputPath(); + ensureSpillFilePermissions(finalOutPath, rfs); + additionalSpillBytesWritternCounter.increment(writer.getCompressedLength()); + } + } + + return (writer != null) && (dataViaEventsEnabled) + && (writer.getCompressedLength() <= dataViaEventsMaxSize); + } + + private ByteBuffer readDataForDME() throws IOException { + if (this.useCachedStream + && !((IFile.FileBackedInMemIFileWriter) writer).isDataFlushedToDisk()) { + return ((IFile.FileBackedInMemIFileWriter) writer).getData(); + } else { + try (FSDataInputStream inStream = rfs.open(finalOutPath)) { + byte[] buf = new byte[(int) writer.getCompressedLength()]; + IOUtils.readFully(inStream, buf, 0, (int) writer.getCompressedLength()); + additionalSpillBytesReadCounter.increment(writer.getCompressedLength()); + return ByteBuffer.wrap(buf); + } + } + } + @Override public List close() throws IOException, InterruptedException { // In case there are buffers to be spilled, schedule spilling @@ -682,7 +755,8 @@ public List close() throws IOException, InterruptedException { isShutdown.set(true); spillLock.lock(); try { - LOG.info(destNameTrimmed + ": " + "Waiting for all spills to complete : Pending : " + pendingSpillCount.get()); + LOG.info( + sourceDestNameTrimmed + ": " + "Waiting for all spills to complete : Pending : " + pendingSpillCount.get()); while (pendingSpillCount.get() != 0 && spillException == null) { spillInProgress.await(); } @@ -690,7 +764,7 @@ public List close() throws IOException, InterruptedException { spillLock.unlock(); } if (spillException != null) { - LOG.error(destNameTrimmed + ": " + "Error during spill, throwing"); + LOG.error(sourceDestNameTrimmed + ": " + "Error during spill, throwing"); // Assuming close will be called on the same thread as the write cleanup(); currentBuffer.cleanup(); @@ -701,7 +775,7 @@ public List close() throws IOException, InterruptedException { throw new IOException(spillException); } } else { - LOG.info(destNameTrimmed + ": " + "All spills complete"); + LOG.info(sourceDestNameTrimmed + ": " + "All spills complete"); // Assuming close will be called on the same thread as the write cleanup(); @@ -711,10 +785,6 @@ public List close() throws IOException, InterruptedException { writer.close(); long rawLen = writer.getRawLength(); long compLen = writer.getCompressedLength(); - TezIndexRecord rec = new TezIndexRecord(0, rawLen, compLen); - TezSpillRecord sr = new TezSpillRecord(1); - sr.putIndex(rec, 0); - sr.writeToFile(finalIndexPath, conf); BitSet emptyPartitions = new BitSet(); if (outputRecordsCounter.getValue() == 0) { @@ -732,8 +802,17 @@ public List close() throws IOException, InterruptedException { fileOutputBytesCounter.increment(compLen + indexFileSizeEstimate); } eventList.add(generateVMEvent()); + + if (!canSendDataOverDME()) { + TezIndexRecord rec = new TezIndexRecord(0, rawLen, compLen); + TezSpillRecord sr = new TezSpillRecord(1); + sr.putIndex(rec, 0); + finalIndexPath = outputFileHandler.getOutputIndexFileForWrite(indexFileSizeEstimate); + sr.writeToFile(finalIndexPath, conf, localFs); + } eventList.add(generateDMEvent(false, -1, false, outputContext - .getUniqueIdentifier(), emptyPartitions)); + .getUniqueIdentifier(), emptyPartitions)); + return eventList; } @@ -824,6 +903,9 @@ private Event generateDMEvent(boolean addSpillDetails, int spillId, outputContext.notifyProgress(); DataMovementEventPayloadProto.Builder payloadBuilder = DataMovementEventPayloadProto .newBuilder(); + if (numPartitions == 1) { + payloadBuilder.setNumRecord((int) outputRecordsCounter.getValue()); + } String host = getHost(); if (emptyPartitions.cardinality() != 0) { @@ -846,6 +928,18 @@ private Event generateDMEvent(boolean addSpillDetails, int spillId, payloadBuilder.setLastEvent(isLastSpill); } + if (canSendDataOverDME()) { + ShuffleUserPayloads.DataProto.Builder dataProtoBuilder = ShuffleUserPayloads.DataProto.newBuilder(); + dataProtoBuilder.setData(ByteString.copyFrom(readDataForDME())); + dataProtoBuilder.setRawLength((int) this.writer.getRawLength()); + + dataProtoBuilder.setCompressedLength((int) this.writer.getCompressedLength()); + payloadBuilder.setData(dataProtoBuilder.build()); + + this.dataViaEventSize.increment(this.writer.getCompressedLength()); + LOG.debug("payload packed in DME, dataSize: " + this.writer.getCompressedLength()); + } + ByteBuffer payload = payloadBuilder.build().toByteString().asReadOnlyByteBuffer(); return CompositeDataMovementEvent.create(0, numPartitions, payload); } @@ -984,15 +1078,17 @@ private void mergeAll() throws IOException { FSDataOutputStream out = null; try { out = rfs.create(finalOutPath); + ensureSpillFilePermissions(finalOutPath, rfs); Writer writer = null; for (int i = 0; i < numPartitions; i++) { long segmentStart = out.getPos(); if (numRecordsPerPartition[i] == 0) { - LOG.info(destNameTrimmed + ": " + "Skipping partition: " + i + " in final merge since it has no records"); + LOG.info( + sourceDestNameTrimmed + ": " + "Skipping partition: " + i + " in final merge since it has no records"); continue; } - writer = new Writer(conf, out, keyClass, valClass, codec, null, null); + writer = new Writer(keySerialization, valSerialization, out, keyClass, valClass, codec, null, null); try { if (currentBuffer.nextPosition != 0 && currentBuffer.partitionPositions[i] != WrappedBuffer.PARTITION_ABSENT_POSITION) { @@ -1014,7 +1110,7 @@ private void mergeAll() throws IOException { ifileBufferSize); while (reader.nextRawKey(keyBufferIFile)) { // TODO Inefficient. If spills are not compressed, a direct copy should be possible - // given the current IFile format. Also exteremely inefficient for large records, + // given the current IFile format. Also extremely inefficient for large records, // since the entire record will be read into memory. reader.nextRawValue(valBufferIFile); writer.append(keyBufferIFile, valBufferIFile); @@ -1039,10 +1135,24 @@ private void mergeAll() throws IOException { if (out != null) { out.close(); } + deleteIntermediateSpills(); } - finalSpillRecord.writeToFile(finalIndexPath, conf); + finalSpillRecord.writeToFile(finalIndexPath, conf, localFs); fileOutputBytesCounter.increment(indexFileSizeEstimate); - LOG.info(destNameTrimmed + ": " + "Finished final spill after merging : " + numSpills.get() + " spills"); + LOG.info(sourceDestNameTrimmed + ": " + "Finished final spill after merging : " + numSpills.get() + " spills"); + } + + private void deleteIntermediateSpills() { + // Delete the intermediate spill files + synchronized (spillInfoList) { + for (SpillInfo spill : spillInfoList) { + try { + rfs.delete(spill.outPath, false); + } catch (IOException e) { + LOG.warn("Unable to delete intermediate spill " + spill.outPath, e); + } + } + } } private void writeLargeRecord(final Object key, final Object value, final int partition) @@ -1058,6 +1168,7 @@ private void writeLargeRecord(final Object key, final Object value, final int pa final TezSpillRecord spillRecord = new TezSpillRecord(numPartitions); final Path outPath = spillPathDetails.outputFilePath; out = rfs.create(outPath); + ensureSpillFilePermissions(outPath, rfs); BitSet emptyPartitions = null; if (pipelinedShuffle || !isFinalMergeEnabled) { emptyPartitions = new BitSet(numPartitions); @@ -1068,7 +1179,7 @@ private void writeLargeRecord(final Object key, final Object value, final int pa spilledRecordsCounter.increment(1); Writer writer = null; try { - writer = new IFile.Writer(conf, out, keyClass, valClass, codec, null, null); + writer = new IFile.Writer(keySerialization, valSerialization, out, keyClass, valClass, codec, null, null); writer.append(key, value); outputLargeRecordsCounter.increment(1); numRecordsPerPartition[i]++; @@ -1100,9 +1211,10 @@ private void writeLargeRecord(final Object key, final Object value, final int pa mayBeSendEventsForSpill(emptyPartitions, sizePerPartition, spillIndex, false); - LOG.info(destNameTrimmed + ": " + "Finished writing large record of size " + outSize + " to spill file " + spillIndex); + LOG.info(sourceDestNameTrimmed + ": " + "Finished writing large record of size " + outSize + " to spill file " + + spillIndex); if (LOG.isDebugEnabled()) { - LOG.debug(destNameTrimmed + ": " + "LargeRecord Spill=" + spillIndex + ", indexPath=" + LOG.debug(sourceDestNameTrimmed + ": " + "LargeRecord Spill=" + spillIndex + ", indexPath=" + spillPathDetails.indexFilePath + ", outputPath=" + spillPathDetails.outputFilePath); } @@ -1117,7 +1229,7 @@ private void handleSpillIndex(SpillPathDetails spillPathDetails, TezSpillRecord throws IOException { if (spillPathDetails.indexFilePath != null) { //write the index record - spillRecord.writeToFile(spillPathDetails.indexFilePath, conf); + spillRecord.writeToFile(spillPathDetails.indexFilePath, conf, localFs); } else { //add to cache SpillInfo spillInfo = new SpillInfo(spillRecord, spillPathDetails.outputFilePath); @@ -1238,7 +1350,7 @@ private void mayBeSendEventsForSpill( try { events = generateEventForSpill(emptyPartitions, sizePerPartition, spillNumber, isFinalUpdate); - LOG.info(destNameTrimmed + ": " + "Adding spill event for spill" + LOG.info(sourceDestNameTrimmed + ": " + "Adding spill event for spill" + " (final update=" + isFinalUpdate + "), spillId=" + spillNumber); if (pipelinedShuffle) { //Send out an event for consuming. @@ -1247,7 +1359,7 @@ private void mayBeSendEventsForSpill( this.finalEvents.addAll(events); } } catch (IOException e) { - LOG.error(destNameTrimmed + ": " + "Error in sending pipelined events", e); + LOG.error(sourceDestNameTrimmed + ": " + "Error in sending pipelined events", e); outputContext.reportFailure(TaskFailureType.NON_FATAL, e, "Error in sending events."); } @@ -1306,7 +1418,7 @@ public void onSuccess(SpillResult result) { availableBuffers.add(buffer); } } catch (Throwable e) { - LOG.error(destNameTrimmed + ": Failure while attempting to reset buffer after spill", e); + LOG.error(sourceDestNameTrimmed + ": Failure while attempting to reset buffer after spill", e); outputContext.reportFailure(TaskFailureType.NON_FATAL, e, "Failure while attempting to reset buffer after spill"); } @@ -1336,7 +1448,7 @@ public void onSuccess(SpillResult result) { public void onFailure(Throwable t) { // spillException setup to throw an exception back to the user. Requires synchronization. // Consider removing it in favor of having Tez kill the task - LOG.error(destNameTrimmed + ": " + "Failure while spilling to disk", t); + LOG.error(sourceDestNameTrimmed + ": " + "Failure while spilling to disk", t); spillException = t; outputContext.reportFailure(TaskFailureType.NON_FATAL, t, "Failure while spilling to disk"); spillLock.lock(); @@ -1359,7 +1471,8 @@ private static class SpillResult { } } - private static class SpillInfo { + @VisibleForTesting + static class SpillInfo { final TezSpillRecord spillRecord; final Path outPath; diff --git a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/conf/OrderedGroupedKVInputConfig.java b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/conf/OrderedGroupedKVInputConfig.java index 11a8d6fd9e..d9c1d21403 100644 --- a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/conf/OrderedGroupedKVInputConfig.java +++ b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/conf/OrderedGroupedKVInputConfig.java @@ -24,9 +24,9 @@ import java.io.IOException; import java.util.Map; +import java.util.Objects; import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; import com.google.common.collect.Lists; import org.apache.hadoop.classification.InterfaceAudience; @@ -299,8 +299,8 @@ public static class Builder implements SpecificConfigBuilder { @InterfaceAudience.Private Builder(String keyClassName, String valueClassName) { this(); - Preconditions.checkNotNull(keyClassName, "Key class name cannot be null"); - Preconditions.checkNotNull(valueClassName, "Value class name cannot be null"); + Objects.requireNonNull(keyClassName, "Key class name cannot be null"); + Objects.requireNonNull(valueClassName, "Value class name cannot be null"); setKeyClassName(keyClassName); setValueClassName(valueClassName); } @@ -316,14 +316,14 @@ public static class Builder implements SpecificConfigBuilder { @InterfaceAudience.Private Builder setKeyClassName(String keyClassName) { - Preconditions.checkNotNull(keyClassName, "Key class name cannot be null"); + Objects.requireNonNull(keyClassName, "Key class name cannot be null"); this.conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_KEY_CLASS, keyClassName); return this; } @InterfaceAudience.Private Builder setValueClassName(String valueClassName) { - Preconditions.checkNotNull(valueClassName, "Value class name cannot be null"); + Objects.requireNonNull(valueClassName, "Value class name cannot be null"); this.conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_VALUE_CLASS, valueClassName); return this; } @@ -404,7 +404,7 @@ public Builder setKeyComparatorClass(String comparatorClassName) { */ public Builder setKeyComparatorClass(String comparatorClassName, @Nullable Map comparatorConf) { - Preconditions.checkNotNull(comparatorClassName, "Comparator class name cannot be null"); + Objects.requireNonNull(comparatorClassName, "Comparator class name cannot be null"); this.conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_KEY_COMPARATOR_CLASS, comparatorClassName); if (comparatorConf != null) { @@ -418,7 +418,7 @@ public Builder setKeyComparatorClass(String comparatorClassName, @SuppressWarnings("unchecked") @Override public Builder setAdditionalConfiguration(String key, String value) { - Preconditions.checkNotNull(key, "Key cannot be null"); + Objects.requireNonNull(key, "Key cannot be null"); if (ConfigUtils.doesKeyQualify(key, Lists.newArrayList(OrderedGroupedKVInput.getConfigurationKeySet(), TezRuntimeConfiguration.getRuntimeAdditionalConfigKeySet()), @@ -435,7 +435,7 @@ public Builder setAdditionalConfiguration(String key, String value) { @SuppressWarnings("unchecked") @Override public Builder setAdditionalConfiguration(Map confMap) { - Preconditions.checkNotNull(confMap, "ConfMap cannot be null"); + Objects.requireNonNull(confMap, "ConfMap cannot be null"); Map map = ConfigUtils.extractConfigurationMap(confMap, Lists.newArrayList(OrderedGroupedKVInput.getConfigurationKeySet(), TezRuntimeConfiguration.getRuntimeAdditionalConfigKeySet()), TezRuntimeConfiguration.getAllowedPrefixes()); @@ -447,7 +447,7 @@ public Builder setAdditionalConfiguration(Map confMap) { @Override public Builder setFromConfiguration(Configuration conf) { // Maybe ensure this is the first call ? Otherwise this can end up overriding other parameters - Preconditions.checkArgument(conf != null, "Configuration cannot be null"); + Objects.requireNonNull(conf, "Configuration cannot be null"); Map map = ConfigUtils.extractConfigurationMap(conf, Lists.newArrayList(OrderedGroupedKVInput.getConfigurationKeySet(), TezRuntimeConfiguration.getRuntimeAdditionalConfigKeySet()), TezRuntimeConfiguration.getAllowedPrefixes()); @@ -459,7 +459,7 @@ public Builder setFromConfiguration(Configuration conf) { @Override public Builder setFromConfigurationUnfiltered(Configuration conf) { // Maybe ensure this is the first call ? Otherwise this can end up overriding other parameters - Preconditions.checkArgument(conf != null, "Configuration cannot be null"); + Objects.requireNonNull(conf, "Configuration cannot be null"); ConfigUtils.mergeConfs(this.conf, conf); return this; } @@ -493,10 +493,8 @@ public Builder setCompression(boolean enabled, @Nullable String compressionCodec */ public Builder setKeySerializationClass(String serializationClassName, String comparatorClassName, @Nullable Map serializerConf) { - Preconditions.checkArgument(serializationClassName != null, - "serializationClassName cannot be null"); - Preconditions.checkArgument(comparatorClassName != null, - "comparator cannot be null"); + Objects.requireNonNull(serializationClassName, "serializationClassName cannot be null"); + Objects.requireNonNull(comparatorClassName, "comparator cannot be null"); this.conf.set(CommonConfigurationKeys.IO_SERIALIZATIONS_KEY, serializationClassName + "," + conf.get(CommonConfigurationKeys.IO_SERIALIZATIONS_KEY)); setKeyComparatorClass(comparatorClassName, null); @@ -519,8 +517,7 @@ public Builder setKeySerializationClass(String serializationClassName, */ public Builder setValueSerializationClass(String serializationClassName, @Nullable Map serializerConf) { - Preconditions.checkArgument(serializationClassName != null, - "serializationClassName cannot be null"); + Objects.requireNonNull(serializationClassName, "serializationClassName cannot be null"); this.conf.set(CommonConfigurationKeys.IO_SERIALIZATIONS_KEY, serializationClassName + "," + conf.get(CommonConfigurationKeys.IO_SERIALIZATIONS_KEY)); if (serializerConf != null) { diff --git a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/conf/OrderedPartitionedKVEdgeConfig.java b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/conf/OrderedPartitionedKVEdgeConfig.java index 350420390a..e4e9be2de0 100644 --- a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/conf/OrderedPartitionedKVEdgeConfig.java +++ b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/conf/OrderedPartitionedKVEdgeConfig.java @@ -21,8 +21,7 @@ import javax.annotation.Nullable; import java.util.Map; - -import com.google.common.base.Preconditions; +import java.util.Objects; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; @@ -145,7 +144,7 @@ public EdgeProperty createDefaultEdgeProperty() { * @return an {@link org.apache.tez.dag.api.EdgeProperty} instance */ public EdgeProperty createDefaultCustomEdgeProperty(EdgeManagerPluginDescriptor edgeManagerDescriptor) { - Preconditions.checkNotNull(edgeManagerDescriptor, "EdgeManagerDescriptor cannot be null"); + Objects.requireNonNull(edgeManagerDescriptor, "EdgeManagerDescriptor cannot be null"); EdgeProperty edgeProperty = EdgeProperty.create(edgeManagerDescriptor, EdgeProperty.DataSourceType.PERSISTED, EdgeProperty.SchedulingType.SEQUENTIAL, diff --git a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/conf/OrderedPartitionedKVOutputConfig.java b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/conf/OrderedPartitionedKVOutputConfig.java index 0f37c66616..9671feb8b9 100644 --- a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/conf/OrderedPartitionedKVOutputConfig.java +++ b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/conf/OrderedPartitionedKVOutputConfig.java @@ -24,9 +24,9 @@ import java.io.IOException; import java.util.Map; +import java.util.Objects; import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; import com.google.common.collect.Lists; import org.apache.hadoop.classification.InterfaceAudience; @@ -261,9 +261,9 @@ public static class Builder implements SpecificConfigBuilder { Builder(String keyClassName, String valueClassName, String partitionerClassName, @Nullable Map partitionerConf) { this(); - Preconditions.checkNotNull(keyClassName, "Key class name cannot be null"); - Preconditions.checkNotNull(valueClassName, "Value class name cannot be null"); - Preconditions.checkNotNull(partitionerClassName, "Partitioner class name cannot be null"); + Objects.requireNonNull(keyClassName, "Key class name cannot be null"); + Objects.requireNonNull(valueClassName, "Value class name cannot be null"); + Objects.requireNonNull(partitionerClassName, "Partitioner class name cannot be null"); setKeyClassName(keyClassName); setValueClassName(valueClassName); setPartitioner(partitionerClassName, partitionerConf); @@ -280,21 +280,21 @@ public static class Builder implements SpecificConfigBuilder { @InterfaceAudience.Private Builder setKeyClassName(String keyClassName) { - Preconditions.checkNotNull(keyClassName, "Key class name cannot be null"); + Objects.requireNonNull(keyClassName, "Key class name cannot be null"); this.conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_KEY_CLASS, keyClassName); return this; } @InterfaceAudience.Private Builder setValueClassName(String valueClassName) { - Preconditions.checkNotNull(valueClassName, "Value class name cannot be null"); + Objects.requireNonNull(valueClassName, "Value class name cannot be null"); this.conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_VALUE_CLASS, valueClassName); return this; } @InterfaceAudience.Private Builder setPartitioner(String partitionerClassName, @Nullable Map partitionerConf) { - Preconditions.checkNotNull(partitionerClassName, "Partitioner class name cannot be null"); + Objects.requireNonNull(partitionerClassName, "Partitioner class name cannot be null"); this.conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_PARTITIONER_CLASS, partitionerClassName); if (partitionerConf != null) { // Merging the confs for now. Change to be specific in the future. @@ -334,7 +334,7 @@ public Builder setSorterNumThreads(int numThreads) { @Override public Builder setSorter(SorterImpl sorterImpl) { - Preconditions.checkNotNull(sorterImpl, "Sorter cannot be null"); + Objects.requireNonNull(sorterImpl, "Sorter cannot be null"); this.conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_SORTER_CLASS, sorterImpl.name()); return this; @@ -344,7 +344,7 @@ public Builder setSorter(SorterImpl sorterImpl) { @SuppressWarnings("unchecked") @Override public Builder setAdditionalConfiguration(String key, String value) { - Preconditions.checkNotNull(key, "Key cannot be null"); + Objects.requireNonNull(key, "Key cannot be null"); if (ConfigUtils.doesKeyQualify(key, Lists.newArrayList(OrderedPartitionedKVOutput.getConfigurationKeySet(), TezRuntimeConfiguration.getRuntimeAdditionalConfigKeySet()), @@ -361,7 +361,7 @@ public Builder setAdditionalConfiguration(String key, String value) { @SuppressWarnings("unchecked") @Override public Builder setAdditionalConfiguration(Map confMap) { - Preconditions.checkNotNull(confMap, "ConfMap cannot be null"); + Objects.requireNonNull(confMap, "ConfMap cannot be null"); Map map = ConfigUtils.extractConfigurationMap(confMap, Lists.newArrayList(OrderedPartitionedKVOutput.getConfigurationKeySet(), TezRuntimeConfiguration.getRuntimeAdditionalConfigKeySet()), TezRuntimeConfiguration.getAllowedPrefixes()); @@ -373,7 +373,7 @@ public Builder setAdditionalConfiguration(Map confMap) { @Override public Builder setFromConfiguration(Configuration conf) { // Maybe ensure this is the first call ? Otherwise this can end up overriding other parameters - Preconditions.checkArgument(conf != null, "Configuration cannot be null"); + Objects.requireNonNull(conf, "Configuration cannot be null"); Map map = ConfigUtils.extractConfigurationMap(conf, Lists.newArrayList(OrderedPartitionedKVOutput.getConfigurationKeySet(), TezRuntimeConfiguration.getRuntimeAdditionalConfigKeySet()), TezRuntimeConfiguration.getAllowedPrefixes()); @@ -385,7 +385,7 @@ public Builder setFromConfiguration(Configuration conf) { @Override public Builder setFromConfigurationUnfiltered(Configuration conf) { // Maybe ensure this is the first call ? Otherwise this can end up overriding other parameters - Preconditions.checkArgument(conf != null, "Configuration cannot be null"); + Objects.requireNonNull(conf, "Configuration cannot be null"); ConfigUtils.mergeConfs(this.conf, conf); return this; } @@ -411,10 +411,11 @@ public Builder setKeyComparatorClass(String comparatorClassName) { * java.util.Map} of key-value pairs. The keys should be limited to * the ones required by the comparator. * @return instance of the current builder + * @throws NullPointerException if {@code comparatorClassName} is {@code null} */ public Builder setKeyComparatorClass(String comparatorClassName, @Nullable Map comparatorConf) { - Preconditions.checkNotNull(comparatorClassName, "Comparator class name cannot be null"); + Objects.requireNonNull(comparatorClassName, "Comparator class name cannot be null"); this.conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_KEY_COMPARATOR_CLASS, comparatorClassName); if (comparatorConf != null) { @@ -451,13 +452,13 @@ public Builder setCompression(boolean enabled, @Nullable String compressionCodec * {@link java.util.Map} of key-value pairs. The keys should be limited * to the ones required by the comparator. * @return this object for further chained method calls + * @throws NullPointerException if {@code serializationClassName} or + * {@code comparatorClassName} is {@code null} */ public Builder setKeySerializationClass(String serializationClassName, String comparatorClassName, @Nullable Map serializerConf) { - Preconditions.checkArgument(serializationClassName != null, - "serializationClassName cannot be null"); - Preconditions.checkArgument(comparatorClassName != null, - "comparator cannot be null"); + Objects.requireNonNull(serializationClassName, "serializationClassName cannot be null"); + Objects.requireNonNull(comparatorClassName, "comparator cannot be null"); this.conf.set(CommonConfigurationKeys.IO_SERIALIZATIONS_KEY, serializationClassName + "," + conf.get(CommonConfigurationKeys.IO_SERIALIZATIONS_KEY)); setKeyComparatorClass(comparatorClassName, null); @@ -477,11 +478,11 @@ public Builder setKeySerializationClass(String serializationClassName, * {@link java.util.Map} of key-value pairs. The keys should be limited * to the ones required by the comparator. * @return this object for further chained method calls + * @throws NullPointerException if {@code serializationClassName} is {@code null} */ public Builder setValueSerializationClass(String serializationClassName, @Nullable Map serializerConf) { - Preconditions.checkArgument(serializationClassName != null, - "serializationClassName cannot be null"); + Objects.requireNonNull(serializationClassName, "serializationClassName cannot be null"); this.conf.set(CommonConfigurationKeys.IO_SERIALIZATIONS_KEY, serializationClassName + "," + conf.get(CommonConfigurationKeys.IO_SERIALIZATIONS_KEY)); if (serializerConf != null) { diff --git a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/conf/UnorderedKVEdgeConfig.java b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/conf/UnorderedKVEdgeConfig.java index 25a48232b0..02e8a8950a 100644 --- a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/conf/UnorderedKVEdgeConfig.java +++ b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/conf/UnorderedKVEdgeConfig.java @@ -23,8 +23,7 @@ import javax.annotation.Nullable; import java.util.Map; - -import com.google.common.base.Preconditions; +import java.util.Objects; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; @@ -147,7 +146,7 @@ public EdgeProperty createDefaultOneToOneEdgeProperty() { * @return an {@link org.apache.tez.dag.api.EdgeProperty} instance */ public EdgeProperty createDefaultCustomEdgeProperty(EdgeManagerPluginDescriptor edgeManagerDescriptor) { - Preconditions.checkNotNull(edgeManagerDescriptor, "EdgeManagerDescriptor cannot be null"); + Objects.requireNonNull(edgeManagerDescriptor, "EdgeManagerDescriptor cannot be null"); EdgeProperty edgeProperty = EdgeProperty.create(edgeManagerDescriptor, EdgeProperty.DataSourceType.PERSISTED, EdgeProperty.SchedulingType.SEQUENTIAL, @@ -277,7 +276,7 @@ public UnorderedKVInputConfig.SpecificBuilder configureInput() { /** * Build and return an instance of the configuration - * @return an instance of the acatual configuration + * @return an instance of the actual configuration */ public UnorderedKVEdgeConfig build() { return new UnorderedKVEdgeConfig(outputBuilder.build(), inputBuilder.build()); diff --git a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/conf/UnorderedKVInputConfig.java b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/conf/UnorderedKVInputConfig.java index af7dbf6c22..6f285e7995 100644 --- a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/conf/UnorderedKVInputConfig.java +++ b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/conf/UnorderedKVInputConfig.java @@ -24,9 +24,9 @@ import java.io.IOException; import java.util.Map; +import java.util.Objects; import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; import com.google.common.collect.Lists; import org.apache.hadoop.classification.InterfaceAudience; @@ -213,8 +213,8 @@ public static class Builder implements SpecificConfigBuilder { @InterfaceAudience.Private Builder(String keyClassName, String valueClassName) { this(); - Preconditions.checkNotNull(keyClassName, "Key class name cannot be null"); - Preconditions.checkNotNull(valueClassName, "Value class name cannot be null"); + Objects.requireNonNull(keyClassName, "Key class name cannot be null"); + Objects.requireNonNull(valueClassName, "Value class name cannot be null"); setKeyClassName(keyClassName); setValueClassName(valueClassName); } @@ -230,14 +230,14 @@ public static class Builder implements SpecificConfigBuilder { @InterfaceAudience.Private Builder setKeyClassName(String keyClassName) { - Preconditions.checkNotNull(keyClassName, "Key class name cannot be null"); + Objects.requireNonNull(keyClassName, "Key class name cannot be null"); this.conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_KEY_CLASS, keyClassName); return this; } @InterfaceAudience.Private Builder setValueClassName(String valueClassName) { - Preconditions.checkNotNull(valueClassName, "Value class name cannot be null"); + Objects.requireNonNull(valueClassName, "Value class name cannot be null"); this.conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_VALUE_CLASS, valueClassName); return this; } @@ -265,7 +265,7 @@ public Builder setMergeFraction(float mergeFraction) { @SuppressWarnings("unchecked") @Override public Builder setAdditionalConfiguration(String key, String value) { - Preconditions.checkNotNull(key, "Key cannot be null"); + Objects.requireNonNull(key, "Key cannot be null"); if (ConfigUtils.doesKeyQualify(key, Lists.newArrayList(UnorderedKVInput.getConfigurationKeySet(), TezRuntimeConfiguration.getRuntimeAdditionalConfigKeySet()), @@ -282,7 +282,7 @@ public Builder setAdditionalConfiguration(String key, String value) { @SuppressWarnings("unchecked") @Override public Builder setAdditionalConfiguration(Map confMap) { - Preconditions.checkNotNull(confMap, "ConfMap cannot be null"); + Objects.requireNonNull(confMap, "ConfMap cannot be null"); Map map = ConfigUtils.extractConfigurationMap(confMap, Lists.newArrayList(UnorderedKVInput.getConfigurationKeySet(), TezRuntimeConfiguration.getRuntimeAdditionalConfigKeySet()), TezRuntimeConfiguration.getAllowedPrefixes()); @@ -294,7 +294,7 @@ public Builder setAdditionalConfiguration(Map confMap) { @Override public Builder setFromConfiguration(Configuration conf) { // Maybe ensure this is the first call ? Otherwise this can end up overriding other parameters - Preconditions.checkArgument(conf != null, "Configuration cannot be null"); + Objects.requireNonNull(conf, "Configuration cannot be null"); Map map = ConfigUtils.extractConfigurationMap(conf, Lists.newArrayList(UnorderedKVInput.getConfigurationKeySet(), TezRuntimeConfiguration.getRuntimeAdditionalConfigKeySet()), TezRuntimeConfiguration.getAllowedPrefixes()); @@ -306,7 +306,7 @@ public Builder setFromConfiguration(Configuration conf) { @Override public Builder setFromConfigurationUnfiltered(Configuration conf) { // Maybe ensure this is the first call ? Otherwise this can end up overriding other parameters - Preconditions.checkArgument(conf != null, "Configuration cannot be null"); + Objects.requireNonNull(conf, "Configuration cannot be null"); ConfigUtils.mergeConfs(this.conf, conf); return this; } @@ -338,8 +338,7 @@ public Builder setCompression(boolean enabled, @Nullable String compressionCodec */ public Builder setKeySerializationClass(String serializationClassName, @Nullable Map serializerConf) { - Preconditions.checkArgument(serializationClassName != null, - "serializationClassName cannot be null"); + Objects.requireNonNull(serializationClassName, "serializationClassName cannot be null"); this.conf.set(CommonConfigurationKeys.IO_SERIALIZATIONS_KEY, serializationClassName + "," + conf.get(CommonConfigurationKeys.IO_SERIALIZATIONS_KEY)); if (serializerConf != null) { @@ -361,8 +360,7 @@ public Builder setKeySerializationClass(String serializationClassName, */ public Builder setValueSerializationClass(String serializationClassName, @Nullable Map serializerConf) { - Preconditions.checkArgument(serializationClassName != null, - "serializationClassName cannot be null"); + Objects.requireNonNull(serializationClassName, "serializationClassName cannot be null"); this.conf.set(CommonConfigurationKeys.IO_SERIALIZATIONS_KEY, serializationClassName + "," + conf.get(CommonConfigurationKeys.IO_SERIALIZATIONS_KEY)); if (serializerConf != null) { diff --git a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/conf/UnorderedKVOutputConfig.java b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/conf/UnorderedKVOutputConfig.java index a9a15a0c15..a6ac7f2b5a 100644 --- a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/conf/UnorderedKVOutputConfig.java +++ b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/conf/UnorderedKVOutputConfig.java @@ -24,9 +24,9 @@ import java.io.IOException; import java.util.Map; +import java.util.Objects; import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; import com.google.common.collect.Lists; import org.apache.hadoop.classification.InterfaceAudience; @@ -160,8 +160,8 @@ public static class Builder implements SpecificConfigBuilder { @InterfaceAudience.Private Builder(String keyClassName, String valueClassName) { this(); - Preconditions.checkNotNull(keyClassName, "Key class name cannot be null"); - Preconditions.checkNotNull(valueClassName, "Value class name cannot be null"); + Objects.requireNonNull(keyClassName, "Key class name cannot be null"); + Objects.requireNonNull(valueClassName, "Value class name cannot be null"); setKeyClassName(keyClassName); setValueClassName(valueClassName); } @@ -177,14 +177,14 @@ public static class Builder implements SpecificConfigBuilder { @InterfaceAudience.Private Builder setKeyClassName(String keyClassName) { - Preconditions.checkNotNull(keyClassName, "Key class name cannot be null"); + Objects.requireNonNull(keyClassName, "Key class name cannot be null"); this.conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_KEY_CLASS, keyClassName); return this; } @InterfaceAudience.Private Builder setValueClassName(String valueClassName) { - Preconditions.checkNotNull(valueClassName, "Value class name cannot be null"); + Objects.requireNonNull(valueClassName, "Value class name cannot be null"); this.conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_VALUE_CLASS, valueClassName); return this; } @@ -192,7 +192,7 @@ Builder setValueClassName(String valueClassName) { @SuppressWarnings("unchecked") @Override public Builder setAdditionalConfiguration(String key, String value) { - Preconditions.checkNotNull(key, "Key cannot be null"); + Objects.requireNonNull(key, "Key cannot be null"); if (ConfigUtils.doesKeyQualify(key, Lists.newArrayList(UnorderedKVOutput.getConfigurationKeySet(), TezRuntimeConfiguration.getRuntimeAdditionalConfigKeySet()), @@ -209,7 +209,7 @@ public Builder setAdditionalConfiguration(String key, String value) { @SuppressWarnings("unchecked") @Override public Builder setAdditionalConfiguration(Map confMap) { - Preconditions.checkNotNull(confMap, "ConfMap cannot be null"); + Objects.requireNonNull(confMap, "ConfMap cannot be null"); Map map = ConfigUtils.extractConfigurationMap(confMap, Lists.newArrayList(UnorderedKVOutput.getConfigurationKeySet(), TezRuntimeConfiguration.getRuntimeAdditionalConfigKeySet()), TezRuntimeConfiguration.getAllowedPrefixes()); @@ -221,7 +221,7 @@ public Builder setAdditionalConfiguration(Map confMap) { @Override public Builder setFromConfiguration(Configuration conf) { // Maybe ensure this is the first call ? Otherwise this can end up overriding other parameters - Preconditions.checkArgument(conf != null, "Configuration cannot be null"); + Objects.requireNonNull(conf, "Configuration cannot be null"); Map map = ConfigUtils.extractConfigurationMap(conf, Lists.newArrayList(UnorderedKVOutput.getConfigurationKeySet(), TezRuntimeConfiguration.getRuntimeAdditionalConfigKeySet()), TezRuntimeConfiguration.getAllowedPrefixes()); @@ -233,7 +233,7 @@ public Builder setFromConfiguration(Configuration conf) { @Override public Builder setFromConfigurationUnfiltered(Configuration conf) { // Maybe ensure this is the first call ? Otherwise this can end up overriding other parameters - Preconditions.checkArgument(conf != null, "Configuration cannot be null"); + Objects.requireNonNull(conf, "Configuration cannot be null"); ConfigUtils.mergeConfs(this.conf, conf); return this; } @@ -249,8 +249,7 @@ public Builder setFromConfigurationUnfiltered(Configuration conf) { */ public Builder setKeySerializationClass(String serializationClassName, @Nullable Map serializerConf) { - Preconditions.checkArgument(serializationClassName != null, - "serializationClassName cannot be null"); + Objects.requireNonNull(serializationClassName, "serializationClassName cannot be null"); this.conf.set(CommonConfigurationKeys.IO_SERIALIZATIONS_KEY, serializationClassName + "," + conf.get(CommonConfigurationKeys.IO_SERIALIZATIONS_KEY)); if (serializerConf != null) { @@ -272,8 +271,7 @@ public Builder setKeySerializationClass(String serializationClassName, */ public Builder setValueSerializationClass(String serializationClassName, @Nullable Map serializerConf) { - Preconditions.checkArgument(serializationClassName != null, - "serializationClassName cannot be null"); + Objects.requireNonNull(serializationClassName, "serializationClassName cannot be null"); this.conf.set(CommonConfigurationKeys.IO_SERIALIZATIONS_KEY, serializationClassName + "," + conf.get(CommonConfigurationKeys.IO_SERIALIZATIONS_KEY)); if (serializerConf != null) { diff --git a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/conf/UnorderedPartitionedKVEdgeConfig.java b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/conf/UnorderedPartitionedKVEdgeConfig.java index 52da491517..08cfa6e668 100644 --- a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/conf/UnorderedPartitionedKVEdgeConfig.java +++ b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/conf/UnorderedPartitionedKVEdgeConfig.java @@ -23,8 +23,7 @@ import javax.annotation.Nullable; import java.util.Map; - -import com.google.common.base.Preconditions; +import java.util.Objects; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; @@ -151,7 +150,7 @@ public EdgeProperty createDefaultEdgeProperty() { * @return an {@link org.apache.tez.dag.api.EdgeProperty} instance */ public EdgeProperty createDefaultCustomEdgeProperty(EdgeManagerPluginDescriptor edgeManagerDescriptor) { - Preconditions.checkNotNull(edgeManagerDescriptor, "EdgeManagerDescriptor cannot be null"); + Objects.requireNonNull(edgeManagerDescriptor, "EdgeManagerDescriptor cannot be null"); EdgeProperty edgeProperty = EdgeProperty.create(edgeManagerDescriptor, EdgeProperty.DataSourceType.PERSISTED, EdgeProperty.SchedulingType.SEQUENTIAL, @@ -282,7 +281,7 @@ public UnorderedKVInputConfig.SpecificBuilder configureInput() { /** * Build and return an instance of the configuration - * @return an instance of the acatual configuration + * @return an instance of the actual configuration */ public UnorderedPartitionedKVEdgeConfig build() { return new UnorderedPartitionedKVEdgeConfig(outputBuilder.build(), inputBuilder.build()); diff --git a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/conf/UnorderedPartitionedKVOutputConfig.java b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/conf/UnorderedPartitionedKVOutputConfig.java index 3555e1c3c0..dbbfb9ea6d 100644 --- a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/conf/UnorderedPartitionedKVOutputConfig.java +++ b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/conf/UnorderedPartitionedKVOutputConfig.java @@ -24,9 +24,9 @@ import java.io.IOException; import java.util.Map; +import java.util.Objects; import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; import com.google.common.collect.Lists; import org.apache.hadoop.classification.InterfaceAudience; @@ -183,9 +183,9 @@ public static class Builder implements SpecificConfigBuilder { Builder(String keyClassName, String valueClassName, String partitionerClassName, Map partitionerConf) { this(); - Preconditions.checkNotNull(keyClassName, "Key class name cannot be null"); - Preconditions.checkNotNull(valueClassName, "Value class name cannot be null"); - Preconditions.checkNotNull(partitionerClassName, "Partitioner class name cannot be null"); + Objects.requireNonNull(keyClassName, "Key class name cannot be null"); + Objects.requireNonNull(valueClassName, "Value class name cannot be null"); + Objects.requireNonNull(partitionerClassName, "Partitioner class name cannot be null"); setKeyClassName(keyClassName); setValueClassName(valueClassName); setPartitioner(partitionerClassName, partitionerConf); @@ -202,21 +202,21 @@ public static class Builder implements SpecificConfigBuilder { @InterfaceAudience.Private Builder setKeyClassName(String keyClassName) { - Preconditions.checkNotNull(keyClassName, "Key class name cannot be null"); + Objects.requireNonNull(keyClassName, "Key class name cannot be null"); this.conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_KEY_CLASS, keyClassName); return this; } @InterfaceAudience.Private Builder setValueClassName(String valueClassName) { - Preconditions.checkNotNull(valueClassName, "Value class name cannot be null"); + Objects.requireNonNull(valueClassName, "Value class name cannot be null"); this.conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_VALUE_CLASS, valueClassName); return this; } @InterfaceAudience.Private Builder setPartitioner(String partitionerClassName, Map partitionerConf) { - Preconditions.checkNotNull(partitionerClassName, "Partitioner class name cannot be null"); + Objects.requireNonNull(partitionerClassName, "Partitioner class name cannot be null"); this.conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_PARTITIONER_CLASS, partitionerClassName); if (partitionerConf != null) { // Merging the confs for now. Change to be specific in the future. @@ -236,7 +236,7 @@ public Builder setAvailableBufferSize(int availableBufferSize) { @SuppressWarnings("unchecked") @Override public Builder setAdditionalConfiguration(String key, String value) { - Preconditions.checkNotNull(key, "Key cannot be null"); + Objects.requireNonNull(key, "Key cannot be null"); if (ConfigUtils.doesKeyQualify(key, Lists.newArrayList(UnorderedPartitionedKVOutput.getConfigurationKeySet(), TezRuntimeConfiguration.getRuntimeAdditionalConfigKeySet()), @@ -253,7 +253,7 @@ public Builder setAdditionalConfiguration(String key, String value) { @SuppressWarnings("unchecked") @Override public Builder setAdditionalConfiguration(Map confMap) { - Preconditions.checkNotNull(confMap, "ConfMap cannot be null"); + Objects.requireNonNull(confMap, "ConfMap cannot be null"); Map map = ConfigUtils.extractConfigurationMap(confMap, Lists.newArrayList(UnorderedPartitionedKVOutput.getConfigurationKeySet(), TezRuntimeConfiguration.getRuntimeAdditionalConfigKeySet()), TezRuntimeConfiguration.getAllowedPrefixes()); @@ -265,7 +265,7 @@ public Builder setAdditionalConfiguration(Map confMap) { @Override public Builder setFromConfiguration(Configuration conf) { // Maybe ensure this is the first call ? Otherwise this can end up overriding other parameters - Preconditions.checkArgument(conf != null, "Configuration cannot be null"); + Objects.requireNonNull(conf, "Configuration cannot be null"); Map map = ConfigUtils.extractConfigurationMap(conf, Lists.newArrayList(UnorderedPartitionedKVOutput.getConfigurationKeySet(), TezRuntimeConfiguration.getRuntimeAdditionalConfigKeySet()), TezRuntimeConfiguration.getAllowedPrefixes()); @@ -277,7 +277,7 @@ public Builder setFromConfiguration(Configuration conf) { @Override public Builder setFromConfigurationUnfiltered(Configuration conf) { // Maybe ensure this is the first call ? Otherwise this can end up overriding other parameters - Preconditions.checkArgument(conf != null, "Configuration cannot be null"); + Objects.requireNonNull(conf, "Configuration cannot be null"); ConfigUtils.mergeConfs(this.conf, conf); return this; } @@ -308,8 +308,7 @@ public Builder setCompression(boolean enabled, @Nullable String compressionCodec */ public Builder setKeySerializationClass(String serializationClassName, @Nullable Map serializerConf) { - Preconditions.checkArgument(serializationClassName != null, - "serializationClassName cannot be null"); + Objects.requireNonNull(serializationClassName, "serializationClassName cannot be null"); this.conf.set(CommonConfigurationKeys.IO_SERIALIZATIONS_KEY, serializationClassName + "," + conf.get(CommonConfigurationKeys.IO_SERIALIZATIONS_KEY)); if (serializerConf != null) { @@ -330,8 +329,7 @@ public Builder setKeySerializationClass(String serializationClassName, */ public Builder setValueSerializationClass(String serializationClassName, @Nullable Map serializerConf) { - Preconditions.checkArgument(serializationClassName != null, - "serializationClassName cannot be null"); + Objects.requireNonNull(serializationClassName, "serializationClassName cannot be null"); this.conf.set(CommonConfigurationKeys.IO_SERIALIZATIONS_KEY, serializationClassName + "," + conf.get(CommonConfigurationKeys.IO_SERIALIZATIONS_KEY)); if (serializerConf != null) { diff --git a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/conf/Utils.java b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/conf/Utils.java index c1b44a29d3..768ac6e8c1 100644 --- a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/conf/Utils.java +++ b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/conf/Utils.java @@ -22,7 +22,9 @@ import org.apache.tez.dag.api.EdgeProperty; @Private -class Utils { +final class Utils { + + private Utils() {} /** * Modify the EdgeProperty to set the history text if available diff --git a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/input/OrderedGroupedInputLegacy.java b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/input/OrderedGroupedInputLegacy.java index 6ae156a382..b697be5a1d 100644 --- a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/input/OrderedGroupedInputLegacy.java +++ b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/input/OrderedGroupedInputLegacy.java @@ -57,6 +57,11 @@ public boolean next() throws IOException { return false; } + @Override + public boolean hasNext() throws IOException { + return false; + } + @Override public void close() throws IOException { } diff --git a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/input/OrderedGroupedKVInput.java b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/input/OrderedGroupedKVInput.java index 1cc6d4b787..1463cfabbe 100644 --- a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/input/OrderedGroupedKVInput.java +++ b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/input/OrderedGroupedKVInput.java @@ -28,6 +28,7 @@ import java.util.concurrent.atomic.AtomicBoolean; import com.google.common.annotations.VisibleForTesting; +import org.apache.tez.common.TezUtils; import org.apache.tez.runtime.api.ProgressFailedException; import org.apache.tez.runtime.library.api.IOInterruptedException; import org.apache.tez.runtime.library.common.Constants; @@ -37,7 +38,6 @@ import org.apache.hadoop.classification.InterfaceAudience.Public; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.RawComparator; -import org.apache.tez.common.TezUtils; import org.apache.tez.common.TezRuntimeFrameworkConfigs; import org.apache.tez.common.counters.TaskCounter; import org.apache.tez.common.counters.TezCounter; @@ -54,7 +54,7 @@ import org.apache.tez.runtime.library.common.shuffle.orderedgrouped.Shuffle; import org.apache.tez.runtime.library.common.sort.impl.TezRawKeyValueIterator; -import com.google.common.base.Preconditions; +import org.apache.tez.common.Preconditions; /** @@ -97,14 +97,14 @@ public OrderedGroupedKVInput(InputContext inputContext, int numPhysicalInputs) { @Override public synchronized List initialize() throws IOException { - this.conf = TezUtils.createConfFromUserPayload(getContext().getUserPayload()); + this.conf = TezUtils.createConfFromBaseConfAndPayload(getContext()); if (this.getNumPhysicalInputs() == 0) { getContext().requestInitialMemory(0l, null); isStarted.set(true); getContext().inputIsReady(); LOG.info("input fetch not required since there are 0 physical inputs for input vertex: " - + getContext().getSourceVertexName()); + + getContext().getInputOutputVertexNames()); return Collections.emptyList(); } @@ -130,9 +130,7 @@ public synchronized void start() throws IOException { // Start the shuffle - copy and merge shuffle = createShuffle(); shuffle.run(); - if (LOG.isDebugEnabled()) { - LOG.debug("Initialized the handlers in shuffle..Safe to start processing.."); - } + LOG.debug("Initialized the handlers in shuffle..Safe to start processing.."); List pending = new LinkedList(); pendingEvents.drainTo(pending); if (pending.size() > 0) { @@ -307,7 +305,7 @@ protected synchronized void createValuesIterator() RawComparator rawComparator = ConfigUtils.getIntermediateInputKeyComparator(conf); Class keyClass = ConfigUtils.getIntermediateInputKeyClass(conf); Class valClass = ConfigUtils.getIntermediateInputValueClass(conf); - LOG.info(getContext().getSourceVertexName() + ": " + "creating ValuesIterator with " + LOG.info(getContext().getInputOutputVertexNames() + ": " + "creating ValuesIterator with " + "comparator=" + rawComparator.getClass().getName() + ", keyClass=" + keyClass.getName() + ", valClass=" + valClass.getName()); diff --git a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/input/UnorderedKVInput.java b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/input/UnorderedKVInput.java index 6ba8936091..28914df6bc 100644 --- a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/input/UnorderedKVInput.java +++ b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/input/UnorderedKVInput.java @@ -24,6 +24,7 @@ import java.util.concurrent.LinkedBlockingQueue; import java.util.concurrent.atomic.AtomicBoolean; +import org.apache.tez.common.TezUtils; import org.apache.tez.common.TezUtilsInternal; import org.apache.tez.runtime.api.ProgressFailedException; import org.apache.tez.runtime.library.common.Constants; @@ -34,9 +35,6 @@ import org.apache.hadoop.classification.InterfaceAudience.Public; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.compress.CompressionCodec; -import org.apache.hadoop.io.compress.DefaultCodec; -import org.apache.hadoop.util.ReflectionUtils; -import org.apache.tez.common.TezUtils; import org.apache.tez.common.TezRuntimeFrameworkConfigs; import org.apache.tez.common.counters.TaskCounter; import org.apache.tez.common.counters.TezCounter; @@ -46,19 +44,18 @@ import org.apache.tez.runtime.api.InputContext; import org.apache.tez.runtime.library.api.KeyValueReader; import org.apache.tez.runtime.library.api.TezRuntimeConfiguration; -import org.apache.tez.runtime.library.common.ConfigUtils; import org.apache.tez.runtime.library.common.MemoryUpdateCallbackHandler; import org.apache.tez.runtime.library.common.readers.UnorderedKVReader; import org.apache.tez.runtime.library.common.shuffle.ShuffleEventHandler; import org.apache.tez.runtime.library.common.shuffle.impl.ShuffleInputEventHandlerImpl; import org.apache.tez.runtime.library.common.shuffle.impl.ShuffleManager; import org.apache.tez.runtime.library.common.shuffle.impl.SimpleFetchedInputAllocator; - -import com.google.common.base.Preconditions; +import org.apache.tez.runtime.library.utils.CodecUtils; +import org.apache.tez.common.Preconditions; /** * {@link UnorderedKVInput} provides unordered key value input by - * bringing together (shuffling) a set of distributed data and providing a + * bringing together (shuffling) a set of distributed data and providing a * unified view to that data. There are no ordering constraints applied by * this input. */ @@ -88,19 +85,19 @@ public UnorderedKVInput(InputContext inputContext, int numPhysicalInputs) { @Override public synchronized List initialize() throws Exception { Preconditions.checkArgument(getNumPhysicalInputs() != -1, "Number of Inputs has not been set"); - this.conf = TezUtils.createConfFromUserPayload(getContext().getUserPayload()); + this.conf = TezUtils.createConfFromBaseConfAndPayload(getContext()); if (getNumPhysicalInputs() == 0) { getContext().requestInitialMemory(0l, null); isStarted.set(true); getContext().inputIsReady(); LOG.info("input fetch not required since there are 0 physical inputs for input vertex: " - + getContext().getSourceVertexName()); + + getContext().getInputOutputVertexNames()); return Collections.emptyList(); } else { - long initalMemReq = getInitialMemoryReq(); + long initialMemReq = getInitialMemoryReq(); memoryUpdateCallbackHandler = new MemoryUpdateCallbackHandler(); - this.getContext().requestInitialMemory(initalMemReq, memoryUpdateCallbackHandler); + this.getContext().requestInitialMemory(initialMemReq, memoryUpdateCallbackHandler); } this.conf.setStrings(TezRuntimeFrameworkConfigs.LOCAL_DIRS, getContext().getWorkDirs()); @@ -114,14 +111,7 @@ public synchronized void start() throws IOException { if (!isStarted.get()) { ////// Initial configuration memoryUpdateCallbackHandler.validateUpdateReceived(); - CompressionCodec codec; - if (ConfigUtils.isIntermediateInputCompressed(conf)) { - Class codecClass = ConfigUtils - .getIntermediateInputCompressorClass(conf, DefaultCodec.class); - codec = ReflectionUtils.newInstance(codecClass, conf); - } else { - codec = null; - } + CompressionCodec codec = CodecUtils.getCodec(conf); boolean compositeFetch = ShuffleUtils.isTezShuffleHandler(conf); boolean ifileReadAhead = conf.getBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_IFILE_READAHEAD, @@ -158,7 +148,7 @@ public synchronized void start() throws IOException { pendingEvents.drainTo(pending); if (pending.size() > 0) { if (LOG.isDebugEnabled()) { - LOG.debug(getContext().getSourceVertexName() + ": " + "NoAutoStart delay in processing first event: " + LOG.debug(getContext().getInputOutputVertexNames() + ": " + "NoAutoStart delay in processing first event: " + (System.currentTimeMillis() - firstEventReceivedTime)); } inputEventHandler.handleEvents(pending); @@ -225,7 +215,7 @@ public synchronized List close() throws Exception { if (this.shuffleManager != null) { this.shuffleManager.shutdown(); } - + long dataSize = getContext().getCounters() .findCounter(TaskCounter.SHUFFLE_BYTES_DECOMPRESSED).getValue(); getContext().getStatisticsReporter().reportDataSize(dataSize); diff --git a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/output/OrderedPartitionedKVOutput.java b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/output/OrderedPartitionedKVOutput.java index 7d3e0b4d51..44cb9d6aae 100644 --- a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/output/OrderedPartitionedKVOutput.java +++ b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/output/OrderedPartitionedKVOutput.java @@ -30,15 +30,17 @@ import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.Lists; +import org.apache.tez.common.TezUtils; import org.apache.tez.runtime.library.conf.OrderedPartitionedKVOutputConfig.SorterImpl; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceAudience.Public; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.RawLocalFileSystem; import org.apache.tez.common.TezCommonUtils; import org.apache.tez.common.TezRuntimeFrameworkConfigs; -import org.apache.tez.common.TezUtils; import org.apache.tez.dag.api.TezConfiguration; import org.apache.tez.runtime.api.AbstractLogicalOutput; import org.apache.tez.runtime.api.Event; @@ -53,7 +55,7 @@ import org.apache.tez.runtime.library.common.sort.impl.dflt.DefaultSorter; import org.apache.tez.runtime.library.common.shuffle.ShuffleUtils; -import com.google.common.base.Preconditions; +import org.apache.tez.common.Preconditions; /** * {@link OrderedPartitionedKVOutput} is an {@link AbstractLogicalOutput} which sorts @@ -67,6 +69,7 @@ public class OrderedPartitionedKVOutput extends AbstractLogicalOutput { protected ExternalSorter sorter; protected Configuration conf; + private RawLocalFileSystem localFs; protected MemoryUpdateCallbackHandler memoryUpdateCallbackHandler; private long startTime; private long endTime; @@ -87,7 +90,9 @@ public OrderedPartitionedKVOutput(OutputContext outputContext, int numPhysicalOu @Override public synchronized List initialize() throws IOException { this.startTime = System.nanoTime(); - this.conf = TezUtils.createConfFromUserPayload(getContext().getUserPayload()); + this.conf = TezUtils.createConfFromBaseConfAndPayload(getContext()); + this.localFs = (RawLocalFileSystem) FileSystem.getLocal(conf).getRaw(); + // Initializing this parametr in this conf since it is used in multiple // places (wherever LocalDirAllocator is used) - TezTaskOutputFiles, // TezMerger, etc. @@ -130,7 +135,7 @@ public synchronized void start() throws Exception { if (pipelinedShuffle) { if (finalMergeEnabled) { - LOG.info(getContext().getDestinationVertexName() + " disabling final merge as " + LOG.info(getContext().getInputOutputVertexNames() + " disabling final merge as " + TezRuntimeConfiguration.TEZ_RUNTIME_PIPELINED_SHUFFLE_ENABLED + " is enabled."); finalMergeEnabled = false; conf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_ENABLE_FINAL_MERGE_IN_OUTPUT, false); @@ -181,15 +186,15 @@ public synchronized void handleEvents(List outputEvents) { @Override public synchronized List close() throws IOException { - List returnEvents = null; + List returnEvents = Lists.newLinkedList(); if (sorter != null) { sorter.flush(); - sorter.close(); + returnEvents.addAll(sorter.close()); this.endTime = System.nanoTime(); - returnEvents = generateEvents(); + returnEvents.addAll(generateEvents()); sorter = null; } else { - LOG.warn(getContext().getDestinationVertexName() + + LOG.warn(getContext().getInputOutputVertexNames() + ": Attempting to close output {} of type {} before it was started. Generating empty events", getContext().getDestinationVertexName(), this.getClass().getSimpleName()); returnEvents = generateEmptyEvents(); @@ -205,7 +210,7 @@ private List generateEvents() throws IOException { String auxiliaryService = conf.get(TezConfiguration.TEZ_AM_SHUFFLE_AUXILIARY_SERVICE_ID, TezConfiguration.TEZ_AM_SHUFFLE_AUXILIARY_SERVICE_ID_DEFAULT); ShuffleUtils.generateEventOnSpill(eventList, finalMergeEnabled, isLastEvent, - getContext(), 0, new TezSpillRecord(sorter.getFinalIndexFile(), conf), + getContext(), 0, new TezSpillRecord(sorter.getFinalIndexFile(), localFs), getNumPhysicalOutputs(), sendEmptyPartitionDetails, getContext().getUniqueIdentifier(), sorter.getPartitionStats(), sorter.reportDetailedPartitionStats(), auxiliaryService, deflater); } diff --git a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/output/UnorderedKVOutput.java b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/output/UnorderedKVOutput.java index c987024086..bcacc5238e 100644 --- a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/output/UnorderedKVOutput.java +++ b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/output/UnorderedKVOutput.java @@ -25,6 +25,7 @@ import java.util.Set; import java.util.concurrent.atomic.AtomicBoolean; +import org.apache.tez.common.TezUtils; import org.apache.tez.runtime.library.common.shuffle.ShuffleUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -32,7 +33,6 @@ import org.apache.hadoop.classification.InterfaceAudience.Private; import org.apache.hadoop.classification.InterfaceAudience.Public; import org.apache.hadoop.conf.Configuration; -import org.apache.tez.common.TezUtils; import org.apache.tez.common.TezCommonUtils; import org.apache.tez.common.TezRuntimeFrameworkConfigs; import org.apache.tez.common.counters.TaskCounter; @@ -62,8 +62,9 @@ public class UnorderedKVOutput extends AbstractLogicalOutput { @VisibleForTesting UnorderedPartitionedKVWriter kvWriter; - - private Configuration conf; + + @VisibleForTesting + Configuration conf; private MemoryUpdateCallbackHandler memoryUpdateCallbackHandler; private final AtomicBoolean isStarted = new AtomicBoolean(false); @@ -76,7 +77,7 @@ public UnorderedKVOutput(OutputContext outputContext, int numPhysicalOutputs) { @Override public synchronized List initialize() throws Exception { - this.conf = TezUtils.createConfFromUserPayload(getContext().getUserPayload()); + this.conf = TezUtils.createConfFromBaseConfAndPayload(getContext()); this.conf.setStrings(TezRuntimeFrameworkConfigs.LOCAL_DIRS, getContext().getWorkDirs()); @@ -105,7 +106,7 @@ public synchronized void start() throws Exception { this.kvWriter = new UnorderedPartitionedKVWriter(getContext(), conf, 1, memoryUpdateCallbackHandler.getMemoryAssigned()); isStarted.set(true); - LOG.info(getContext().getDestinationVertexName() + " started. MemoryAssigned=" + LOG.info(getContext().getInputOutputVertexNames() + " started. MemoryAssigned=" + memoryUpdateCallbackHandler.getMemoryAssigned()); } } @@ -129,7 +130,7 @@ public synchronized List close() throws Exception { returnEvents = kvWriter.close(); kvWriter = null; } else { - LOG.warn(getContext().getDestinationVertexName() + + LOG.warn(getContext().getInputOutputVertexNames() + ": Attempting to close output {} of type {} before it was started. Generating empty events", getContext().getDestinationVertexName(), this.getClass().getSimpleName()); returnEvents = new LinkedList(); @@ -167,6 +168,9 @@ String getHost() { confKeys.add(TezRuntimeConfiguration.TEZ_RUNTIME_COMPRESS); confKeys.add(TezRuntimeConfiguration.TEZ_RUNTIME_COMPRESS_CODEC); confKeys.add(TezRuntimeConfiguration.TEZ_RUNTIME_EMPTY_PARTITION_INFO_VIA_EVENTS_ENABLED); + confKeys.add(TezRuntimeConfiguration.TEZ_RUNTIME_TRANSFER_DATA_VIA_EVENTS_ENABLED); + confKeys.add(TezRuntimeConfiguration.TEZ_RUNTIME_TRANSFER_DATA_VIA_EVENTS_MAX_SIZE); + confKeys.add(TezRuntimeConfiguration.TEZ_RUNTIME_TRANSFER_DATA_VIA_EVENTS_SUPPORT_IN_MEM_FILE); confKeys.add(TezRuntimeConfiguration.TEZ_RUNTIME_CONVERT_USER_PAYLOAD_TO_HISTORY_TEXT); confKeys.add(TezRuntimeConfiguration.TEZ_RUNTIME_PIPELINED_SHUFFLE_ENABLED); confKeys.add(TezRuntimeConfiguration.TEZ_RUNTIME_ENABLE_FINAL_MERGE_IN_OUTPUT); diff --git a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/output/UnorderedPartitionedKVOutput.java b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/output/UnorderedPartitionedKVOutput.java index 94312f7530..9bc7ea40cd 100644 --- a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/output/UnorderedPartitionedKVOutput.java +++ b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/output/UnorderedPartitionedKVOutput.java @@ -25,15 +25,16 @@ import java.util.Set; import java.util.concurrent.atomic.AtomicBoolean; -import com.google.common.base.Preconditions; +import org.apache.tez.common.Preconditions; +import com.google.common.annotations.VisibleForTesting; +import org.apache.tez.common.TezUtils; import org.apache.tez.runtime.library.common.shuffle.ShuffleUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceAudience.Public; import org.apache.hadoop.conf.Configuration; -import org.apache.tez.common.TezUtils; import org.apache.tez.common.TezCommonUtils; import org.apache.tez.common.TezRuntimeFrameworkConfigs; import org.apache.tez.common.counters.TaskCounter; @@ -57,7 +58,8 @@ public class UnorderedPartitionedKVOutput extends AbstractLogicalOutput { private static final Logger LOG = LoggerFactory.getLogger(UnorderedPartitionedKVOutput.class); - private Configuration conf; + @VisibleForTesting + Configuration conf; private MemoryUpdateCallbackHandler memoryUpdateCallbackHandler; private UnorderedPartitionedKVWriter kvWriter; private final AtomicBoolean isStarted = new AtomicBoolean(false); @@ -68,7 +70,7 @@ public UnorderedPartitionedKVOutput(OutputContext outputContext, int numPhysical @Override public synchronized List initialize() throws Exception { - this.conf = TezUtils.createConfFromUserPayload(getContext().getUserPayload()); + this.conf = TezUtils.createConfFromBaseConfAndPayload(getContext()); this.conf.setStrings(TezRuntimeFrameworkConfigs.LOCAL_DIRS, getContext().getWorkDirs()); this.conf.setInt(TezRuntimeFrameworkConfigs.TEZ_RUNTIME_NUM_EXPECTED_PARTITIONS, getNumPhysicalOutputs()); @@ -106,7 +108,7 @@ public synchronized List close() throws Exception { returnEvents = kvWriter.close(); kvWriter = null; } else { - LOG.warn(getContext().getDestinationVertexName() + + LOG.warn(getContext().getInputOutputVertexNames() + ": Attempting to close output {} of type {} before it was started. Generating empty events", getContext().getDestinationVertexName(), this.getClass().getSimpleName()); returnEvents = new LinkedList(); @@ -140,6 +142,9 @@ public synchronized List close() throws Exception { confKeys.add(TezRuntimeConfiguration.TEZ_RUNTIME_COMPRESS); confKeys.add(TezRuntimeConfiguration.TEZ_RUNTIME_COMPRESS_CODEC); confKeys.add(TezRuntimeConfiguration.TEZ_RUNTIME_EMPTY_PARTITION_INFO_VIA_EVENTS_ENABLED); + confKeys.add(TezRuntimeConfiguration.TEZ_RUNTIME_TRANSFER_DATA_VIA_EVENTS_ENABLED); + confKeys.add(TezRuntimeConfiguration.TEZ_RUNTIME_TRANSFER_DATA_VIA_EVENTS_MAX_SIZE); + confKeys.add(TezRuntimeConfiguration.TEZ_RUNTIME_TRANSFER_DATA_VIA_EVENTS_SUPPORT_IN_MEM_FILE); confKeys.add(TezRuntimeConfiguration.TEZ_RUNTIME_CONVERT_USER_PAYLOAD_TO_HISTORY_TEXT); confKeys.add(TezRuntimeConfiguration.TEZ_RUNTIME_PIPELINED_SHUFFLE_ENABLED); confKeys.add(TezRuntimeConfiguration.TEZ_RUNTIME_ENABLE_FINAL_MERGE_IN_OUTPUT); diff --git a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/processor/SimpleProcessor.java b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/processor/SimpleProcessor.java index c237bc15e4..8c882f67c1 100644 --- a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/processor/SimpleProcessor.java +++ b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/processor/SimpleProcessor.java @@ -35,7 +35,7 @@ /** * Implements an {@link AbstractLogicalIOProcessor} and provides empty * implementations of most methods and handles input/output initialization. - * This can be used to implement simple {@link Processor}s that dont need to + * This can be used to implement simple {@link Processor}s that dont need to * do event handling etc. */ @Public @@ -70,9 +70,9 @@ public void run(Map _inputs, Map _o public abstract void run() throws Exception; /** - * Implements input/output initialization. Can be overriden + * Implements input/output initialization. Can be overridden * to implement custom behavior. Called before {@link #run()} - * is called. + * is called. * @throws Exception */ protected void preOp() throws Exception { @@ -90,7 +90,7 @@ protected void preOp() throws Exception { } /** - * Called after {@link #run()} is called and can be used to + * Called after {@link #run()} is called and can be used to * do post-processing like committing output etc * @throws Exception */ diff --git a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/processor/SleepProcessor.java b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/processor/SleepProcessor.java index 3efcd21c92..7750a13a63 100644 --- a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/processor/SleepProcessor.java +++ b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/processor/SleepProcessor.java @@ -46,6 +46,7 @@ public class SleepProcessor extends AbstractLogicalIOProcessor { private static final Logger LOG = LoggerFactory.getLogger(SleepProcessor.class); + public static final String SLEEP_VERTEX_NAME = "Sleep"; private int timeToSleepMS; protected Map inputs; diff --git a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/resources/WeightedScalingMemoryDistributor.java b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/resources/WeightedScalingMemoryDistributor.java index c5b4fb0f92..52f3d44b4f 100644 --- a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/resources/WeightedScalingMemoryDistributor.java +++ b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/resources/WeightedScalingMemoryDistributor.java @@ -41,7 +41,7 @@ import org.apache.tez.runtime.library.output.UnorderedPartitionedKVOutput; import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; +import org.apache.tez.common.Preconditions; import com.google.common.collect.Lists; import com.google.common.collect.Maps; @@ -244,9 +244,7 @@ private RequestType getRequestTypeForClass(String className) { requestType = RequestType.PARTITIONED_UNSORTED_OUTPUT; } else { requestType = RequestType.OTHER; - if (LOG.isDebugEnabled()) { - LOG.debug("Falling back to RequestType.OTHER for class: " + className); - } + LOG.debug("Falling back to RequestType.OTHER for class: {}", className); } return requestType; } diff --git a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/utils/BufferUtils.java b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/utils/BufferUtils.java index a1685edcfa..9f31a09fa2 100644 --- a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/utils/BufferUtils.java +++ b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/utils/BufferUtils.java @@ -25,7 +25,10 @@ import org.apache.hadoop.io.DataOutputBuffer; @Private -public class BufferUtils { +public final class BufferUtils { + + private BufferUtils() {} + public static int compare(DataInputBuffer buf1, DataInputBuffer buf2) { byte[] b1 = buf1.getData(); byte[] b2 = buf2.getData(); diff --git a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/utils/CodecUtils.java b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/utils/CodecUtils.java new file mode 100644 index 0000000000..340ecceea8 --- /dev/null +++ b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/utils/CodecUtils.java @@ -0,0 +1,209 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tez.runtime.library.utils; + +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; + +import org.apache.hadoop.conf.Configurable; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.CommonConfigurationKeys; +import org.apache.hadoop.fs.CommonConfigurationKeysPublic; +import org.apache.hadoop.io.compress.CodecPool; +import org.apache.hadoop.io.compress.CompressionCodec; +import org.apache.hadoop.io.compress.CompressionInputStream; +import org.apache.hadoop.io.compress.CompressionOutputStream; +import org.apache.hadoop.io.compress.Compressor; +import org.apache.hadoop.io.compress.Decompressor; +import org.apache.hadoop.io.compress.DefaultCodec; +import org.apache.hadoop.util.ReflectionUtils; +import org.apache.tez.runtime.library.api.TezRuntimeConfiguration; +import org.apache.tez.runtime.library.common.ConfigUtils; +import org.apache.tez.runtime.library.common.sort.impl.IFileInputStream; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.google.common.annotations.VisibleForTesting; + +public final class CodecUtils { + + private static final Logger LOG = LoggerFactory.getLogger(CodecUtils.class); + @VisibleForTesting + static final int DEFAULT_BUFFER_SIZE = 256 * 1024; + + private CodecUtils() { + } + + public static CompressionCodec getCodec(Configuration conf) throws IOException { + if (ConfigUtils.shouldCompressIntermediateOutput(conf)) { + Class codecClass = + ConfigUtils.getIntermediateOutputCompressorClass(conf, DefaultCodec.class); + CompressionCodec codec = ReflectionUtils.newInstance(codecClass, conf); + + if (codec != null) { + Class compressorType = null; + Throwable cause = null; + try { + compressorType = codec.getCompressorType(); + } catch (RuntimeException e) { + cause = e; + } + if (compressorType == null) { + String errMsg = String.format( + "Unable to get CompressorType for codec (%s). This is most" + + " likely due to missing native libraries for the codec.", + conf.get(TezRuntimeConfiguration.TEZ_RUNTIME_COMPRESS_CODEC)); + throw new IOException(errMsg, cause); + } + } + return codec; + } else { + return null; + } + } + + public static InputStream getDecompressedInputStreamWithBufferSize(CompressionCodec codec, + IFileInputStream checksumIn, Decompressor decompressor, int compressedLength) + throws IOException { + String bufferSizeProp = getBufferSizeProperty(codec); + CompressionInputStream in = null; + + if (bufferSizeProp != null) { + Configurable configurableCodec = (Configurable) codec; + Configuration conf = configurableCodec.getConf(); + + synchronized (conf) { + int defaultBufferSize = getDefaultBufferSize(conf, codec); + int originalSize = conf.getInt(bufferSizeProp, defaultBufferSize); + + int newBufSize = Math.min(compressedLength, defaultBufferSize); + LOG.debug("buffer size was set according to min({}, {}) => {}={}", compressedLength, + defaultBufferSize, bufferSizeProp, newBufSize); + + conf.setInt(bufferSizeProp, newBufSize); + + in = codec.createInputStream(checksumIn, decompressor); + /* + * We would better reset the original buffer size into the codec. Basically the buffer size + * is used at 2 places. + * + * 1. It can tell the inputstream/outputstream buffersize (which is created by + * codec.createInputStream/codec.createOutputStream). This is something which might and + * should be optimized in config, as inputstreams instantiate and use their own buffer and + * won't reuse buffers from previous streams (TEZ-4135). + * + * 2. The same buffersize is used when a codec creates a new Compressor/Decompressor. The + * fundamental difference is that Compressor/Decompressor instances are expensive and reused + * by hadoop's CodecPool. Here is a hidden mismatch, which can happen when a codec is + * created with a small buffersize config. Once it creates a Compressor/Decompressor + * instance from its config field, the reused Compressor/Decompressor instance will be + * reused later, even when application handles large amount of data. This way we can end up + * in large stream buffers + small compressor/decompressor buffers, which can be suboptimal, + * moreover, it can lead to strange errors, when a compressed output exceeds the size of the + * buffer (TEZ-4234). + * + * An interesting outcome is that - as the codec buffersize config affects both + * compressor(output) and decompressor(input) paths - an altered codec config can cause the + * issues above for Compressor instances as well, even when we tried to leverage from + * smaller buffer size only on decompression paths. + */ + conf.setInt(bufferSizeProp, originalSize); + } + } else { + in = codec.createInputStream(checksumIn, decompressor); + } + + return in; + } + + public static Compressor getCompressor(CompressionCodec codec) { + synchronized (((Configurable) codec).getConf()) { + return CodecPool.getCompressor(codec); + } + } + + public static Decompressor getDecompressor(CompressionCodec codec) { + synchronized (((Configurable) codec).getConf()) { + return CodecPool.getDecompressor(codec); + } + } + + public static CompressionInputStream createInputStream(CompressionCodec codec, + InputStream checksumIn, Decompressor decompressor) throws IOException { + synchronized (((Configurable) codec).getConf()) { + return codec.createInputStream(checksumIn, decompressor); + } + } + + public static CompressionOutputStream createOutputStream(CompressionCodec codec, + OutputStream checksumOut, Compressor compressor) throws IOException { + synchronized (((Configurable) codec).getConf()) { + return codec.createOutputStream(checksumOut, compressor); + } + } + + public static String getBufferSizeProperty(CompressionCodec codec) { + return getBufferSizeProperty(codec.getClass().getName()); + } + + public static String getBufferSizeProperty(String codecClassName) { + switch (codecClassName) { + case "org.apache.hadoop.io.compress.DefaultCodec": + case "org.apache.hadoop.io.compress.BZip2Codec": + case "org.apache.hadoop.io.compress.GzipCodec": + return CommonConfigurationKeysPublic.IO_FILE_BUFFER_SIZE_KEY; + case "org.apache.hadoop.io.compress.SnappyCodec": + return CommonConfigurationKeys.IO_COMPRESSION_CODEC_SNAPPY_BUFFERSIZE_KEY; + case "org.apache.hadoop.io.compress.ZStandardCodec": + return CommonConfigurationKeys.IO_COMPRESSION_CODEC_ZSTD_BUFFER_SIZE_KEY; + case "org.apache.hadoop.io.compress.LzoCodec": + case "com.hadoop.compression.lzo.LzoCodec": + return CommonConfigurationKeys.IO_COMPRESSION_CODEC_LZO_BUFFERSIZE_KEY; + case "org.apache.hadoop.io.compress.Lz4Codec": + return CommonConfigurationKeys.IO_COMPRESSION_CODEC_LZ4_BUFFERSIZE_KEY; + default: + return null; + } + } + + public static int getDefaultBufferSize(Configuration conf, CompressionCodec codec) { + return getDefaultBufferSize(conf, codec.getClass().getName()); + } + + public static int getDefaultBufferSize(Configuration conf, String codecClassName) { + switch (codecClassName) { + case "org.apache.hadoop.io.compress.DefaultCodec": + case "org.apache.hadoop.io.compress.BZip2Codec": + case "org.apache.hadoop.io.compress.GzipCodec": + return CommonConfigurationKeysPublic.IO_FILE_BUFFER_SIZE_DEFAULT; + case "org.apache.hadoop.io.compress.SnappyCodec": + return CommonConfigurationKeys.IO_COMPRESSION_CODEC_SNAPPY_BUFFERSIZE_DEFAULT; + case "org.apache.hadoop.io.compress.ZStandardCodec": + return CommonConfigurationKeys.IO_COMPRESSION_CODEC_ZSTD_BUFFER_SIZE_DEFAULT; + case "org.apache.hadoop.io.compress.LzoCodec": + case "com.hadoop.compression.lzo.LzoCodec": + return CommonConfigurationKeys.IO_COMPRESSION_CODEC_LZO_BUFFERSIZE_DEFAULT; + case "org.apache.hadoop.io.compress.Lz4Codec": + return CommonConfigurationKeys.IO_COMPRESSION_CODEC_LZ4_BUFFERSIZE_DEFAULT; + default: + return DEFAULT_BUFFER_SIZE; + } + } +} \ No newline at end of file diff --git a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/utils/FastByteComparisons.java b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/utils/FastByteComparisons.java index 4bd2552688..3743c2a2ff 100644 --- a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/utils/FastByteComparisons.java +++ b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/utils/FastByteComparisons.java @@ -1,6 +1,6 @@ package org.apache.tez.runtime.library.utils; -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -37,6 +37,8 @@ */ final class FastByteComparisons { + private FastByteComparisons() {} + /** * Lexicographically compare two byte arrays. */ @@ -48,8 +50,8 @@ public static int compareTo(byte[] b1, int s1, int l1, byte[] b2, int s2, private interface Comparer { - abstract public int compareTo(T buffer1, int offset1, int length1, - T buffer2, int offset2, int length2); + int compareTo(T buffer1, int offset1, int length1, + T buffer2, int offset2, int length2); } private static Comparer lexicographicalComparerJavaImpl() { @@ -131,12 +133,10 @@ public Object run() { Field f = Unsafe.class.getDeclaredField("theUnsafe"); f.setAccessible(true); return f.get(null); - } catch (NoSuchFieldException e) { + } catch (NoSuchFieldException | IllegalAccessException e) { // It doesn't matter what we throw; // it's swallowed in getBestComparer(). throw new Error(); - } catch (IllegalAccessException e) { - throw new Error(); } } }); diff --git a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/utils/Grouper.java b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/utils/Grouper.java index b99f3d4745..84900f8f01 100644 --- a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/utils/Grouper.java +++ b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/utils/Grouper.java @@ -17,7 +17,7 @@ */ package org.apache.tez.runtime.library.utils; -import com.google.common.base.Preconditions; +import org.apache.tez.common.Preconditions; /** * This grouper group specified number of items into specified number of groups. diff --git a/tez-runtime-library/src/main/javadoc/resources/META-INF/LICENSE.txt b/tez-runtime-library/src/main/javadoc/resources/META-INF/LICENSE similarity index 100% rename from tez-runtime-library/src/main/javadoc/resources/META-INF/LICENSE.txt rename to tez-runtime-library/src/main/javadoc/resources/META-INF/LICENSE diff --git a/tez-runtime-library/src/main/javadoc/resources/META-INF/NOTICE b/tez-runtime-library/src/main/javadoc/resources/META-INF/NOTICE new file mode 100644 index 0000000000..2595905699 --- /dev/null +++ b/tez-runtime-library/src/main/javadoc/resources/META-INF/NOTICE @@ -0,0 +1,6 @@ +Apache Tez +Copyright 2014-2024 The Apache Software Foundation + +This product includes software developed at +The Apache Software Foundation (http://www.apache.org/). + diff --git a/tez-runtime-library/src/main/javadoc/resources/META-INF/NOTICE.txt b/tez-runtime-library/src/main/javadoc/resources/META-INF/NOTICE.txt deleted file mode 100644 index 3f36fcc6ba..0000000000 --- a/tez-runtime-library/src/main/javadoc/resources/META-INF/NOTICE.txt +++ /dev/null @@ -1,6 +0,0 @@ -Apache Tez -Copyright (c) 2016 The Apache Software Foundation - -This product includes software developed at -The Apache Software Foundation (http://www.apache.org/). - diff --git a/tez-runtime-library/src/main/proto/ShufflePayloads.proto b/tez-runtime-library/src/main/proto/ShufflePayloads.proto index 0a4f4a6147..5cbd18a9b0 100644 --- a/tez-runtime-library/src/main/proto/ShufflePayloads.proto +++ b/tez-runtime-library/src/main/proto/ShufflePayloads.proto @@ -26,15 +26,18 @@ message DataMovementEventPayloadProto { optional int32 port = 3; optional string path_component = 4; optional int32 run_duration = 5; + optional DataProto data = 6; optional bool pipelined = 7; // Related to pipelined shuffle optional bool last_event = 8; // Related to pipelined shuffle optional int32 spill_id = 9; // Related to pipelined shuffle. + optional int32 num_record = 10; } message DataProto { optional int32 raw_length = 1; optional int32 compressed_length = 2; optional bytes data = 3; + optional int32 uncompressed_length = 4; } message InputInformationEventPayloadProto { diff --git a/tez-runtime-library/src/main/resources/META-INF/LICENSE.txt b/tez-runtime-library/src/main/resources/META-INF/LICENSE similarity index 100% rename from tez-runtime-library/src/main/resources/META-INF/LICENSE.txt rename to tez-runtime-library/src/main/resources/META-INF/LICENSE diff --git a/tez-runtime-library/src/main/resources/META-INF/NOTICE b/tez-runtime-library/src/main/resources/META-INF/NOTICE new file mode 100644 index 0000000000..2595905699 --- /dev/null +++ b/tez-runtime-library/src/main/resources/META-INF/NOTICE @@ -0,0 +1,6 @@ +Apache Tez +Copyright 2014-2024 The Apache Software Foundation + +This product includes software developed at +The Apache Software Foundation (http://www.apache.org/). + diff --git a/tez-runtime-library/src/main/resources/META-INF/NOTICE.txt b/tez-runtime-library/src/main/resources/META-INF/NOTICE.txt deleted file mode 100644 index 3f36fcc6ba..0000000000 --- a/tez-runtime-library/src/main/resources/META-INF/NOTICE.txt +++ /dev/null @@ -1,6 +0,0 @@ -Apache Tez -Copyright (c) 2016 The Apache Software Foundation - -This product includes software developed at -The Apache Software Foundation (http://www.apache.org/). - diff --git a/tez-runtime-library/src/test/java/org/apache/tez/dag/library/vertexmanager/TestFairShuffleVertexManager.java b/tez-runtime-library/src/test/java/org/apache/tez/dag/library/vertexmanager/TestFairShuffleVertexManager.java index 61ca785ab6..5108b8f9e6 100644 --- a/tez-runtime-library/src/test/java/org/apache/tez/dag/library/vertexmanager/TestFairShuffleVertexManager.java +++ b/tez-runtime-library/src/test/java/org/apache/tez/dag/library/vertexmanager/TestFairShuffleVertexManager.java @@ -28,7 +28,6 @@ import org.apache.tez.dag.api.InputDescriptor; import org.apache.tez.dag.api.OutputDescriptor; import org.apache.tez.dag.api.TezUncheckedException; -import org.apache.tez.dag.api.VertexLocationHint; import org.apache.tez.dag.api.VertexManagerPluginContext; import org.apache.tez.dag.api.event.VertexState; import org.apache.tez.dag.api.event.VertexStateUpdate; @@ -112,17 +111,22 @@ public void testInvalidSetup() { @Test(timeout = 5000) public void testReduceSchedulingWithPartitionStats() throws Exception { + final int numScatherAndGatherSourceTasks = 300; final Map newEdgeManagers = new HashMap(); - testSchedulingWithPartitionStats(FairRoutingType.REDUCE_PARALLELISM, - 2, 2, newEdgeManagers); + long[] partitionStats = new long[]{(MB), (2 * MB), (5 * MB)}; + testSchedulingWithPartitionStats( + FairRoutingType.REDUCE_PARALLELISM, numScatherAndGatherSourceTasks, + partitionStats, 2,2, 2, newEdgeManagers); EdgeManagerPluginOnDemand edgeManager = (EdgeManagerPluginOnDemand)newEdgeManagers.values().iterator().next(); // The first destination task fetches two partitions from all source tasks. - // 6 == 3 source tasks * 2 merged partitions - Assert.assertEquals(6, edgeManager.getNumDestinationTaskPhysicalInputs(0)); - for (int sourceTaskIndex = 0; sourceTaskIndex < 3; sourceTaskIndex++) { + // Thus the # of inputs == # of source tasks * 2 merged partitions + Assert.assertEquals(numScatherAndGatherSourceTasks * 2, + edgeManager.getNumDestinationTaskPhysicalInputs(0)); + for (int sourceTaskIndex = 0; + sourceTaskIndex < numScatherAndGatherSourceTasks; sourceTaskIndex++) { for (int j = 0; j < 2; j++) { if (j == 0) { EdgeManagerPluginOnDemand.CompositeEventRouteMetadata routeMetadata = @@ -144,19 +148,26 @@ public void testReduceSchedulingWithPartitionStats() throws Exception { @Test(timeout = 5000) public void testFairSchedulingWithPartitionStats() throws Exception { + final int numScatherAndGatherSourceTasks = 300; final Map newEdgeManagers = new HashMap(); - testSchedulingWithPartitionStats(FairRoutingType.FAIR_PARALLELISM, - 3, 2, newEdgeManagers); + long[] partitionStats = new long[]{(MB), (2 * MB), (5 * MB)}; + + testSchedulingWithPartitionStats( + FairRoutingType.FAIR_PARALLELISM, + numScatherAndGatherSourceTasks, partitionStats, + 2, 3, 2, newEdgeManagers); // Get the first edgeManager which is SCATTER_GATHER. EdgeManagerPluginOnDemand edgeManager = (EdgeManagerPluginOnDemand)newEdgeManagers.values().iterator().next(); // The first destination task fetches two partitions from all source tasks. - // 6 == 3 source tasks * 2 merged partitions - Assert.assertEquals(6, edgeManager.getNumDestinationTaskPhysicalInputs(0)); - for (int sourceTaskIndex = 0; sourceTaskIndex < 3; sourceTaskIndex++) { + // Thus the # of inputs == # of source tasks * 2 merged partitions + Assert.assertEquals(numScatherAndGatherSourceTasks * 2, + edgeManager.getNumDestinationTaskPhysicalInputs(0)); + for (int sourceTaskIndex = 0; sourceTaskIndex < numScatherAndGatherSourceTasks; + sourceTaskIndex++) { for (int j = 0; j < 2; j++) { if (j == 0) { EdgeManagerPluginOnDemand.CompositeEventRouteMetadata routeMetadata = @@ -175,9 +186,10 @@ public void testFairSchedulingWithPartitionStats() throws Exception { } } - // The 2nd destination task fetches one partition from the first source - // task. - Assert.assertEquals(1, edgeManager.getNumDestinationTaskPhysicalInputs(1)); + // The 2nd destination task fetches one partition from the first half of + // source tasks. + Assert.assertEquals(numScatherAndGatherSourceTasks / 2, + edgeManager.getNumDestinationTaskPhysicalInputs(1)); for (int j = 0; j < 2; j++) { if (j == 0) { EdgeManagerPluginOnDemand.CompositeEventRouteMetadata routeMetadata = @@ -193,33 +205,59 @@ public void testFairSchedulingWithPartitionStats() throws Exception { } } - // The 3rd destination task fetches one partition from the 2nd and 3rd - // source task. - Assert.assertEquals(2, edgeManager.getNumDestinationTaskPhysicalInputs(2)); - for (int sourceTaskIndex = 1; sourceTaskIndex < 3; sourceTaskIndex++) { + // The 3rd destination task fetches one partition from 2nd half of + // source tasks. + Assert.assertEquals(numScatherAndGatherSourceTasks / 2, + edgeManager.getNumDestinationTaskPhysicalInputs(2)); + for (int sourceTaskIndex = numScatherAndGatherSourceTasks / 2; + sourceTaskIndex < numScatherAndGatherSourceTasks; sourceTaskIndex++) { for (int j = 0; j < 2; j++) { if (j == 0) { EdgeManagerPluginOnDemand.CompositeEventRouteMetadata routeMetadata = edgeManager.routeCompositeDataMovementEventToDestination(sourceTaskIndex, 2); Assert.assertEquals(1, routeMetadata.getCount()); Assert.assertEquals(2, routeMetadata.getSource()); - Assert.assertEquals(sourceTaskIndex - 1, routeMetadata.getTarget()); + Assert.assertEquals( + sourceTaskIndex - numScatherAndGatherSourceTasks / 2, + routeMetadata.getTarget()); } else { EdgeManagerPluginOnDemand.EventRouteMetadata routeMetadata = edgeManager.routeInputSourceTaskFailedEventToDestination(sourceTaskIndex, 2); Assert.assertEquals(1, routeMetadata.getNumEvents()); - Assert.assertEquals(sourceTaskIndex - 1, routeMetadata.getTargetIndices()[0]); + Assert.assertEquals(sourceTaskIndex - numScatherAndGatherSourceTasks / 2, + routeMetadata.getTargetIndices()[0]); } } } } + @Test(timeout = 500000) + public void testOverflow() throws Exception { + final int numScatherAndGatherSourceTasks = 30000; + final Map newEdgeManagers = + new HashMap(); + final int firstPartitionSize = 1; + final int secondPartitionSize = 2; + final int thirdPartitionSize = 500; + long[] partitionStats = new long[]{(firstPartitionSize * MB), + (secondPartitionSize * MB), (thirdPartitionSize * MB)}; + final int expectedDestinationTasks = + (firstPartitionSize + secondPartitionSize + thirdPartitionSize) + * numScatherAndGatherSourceTasks / 1000; + + testSchedulingWithPartitionStats( + FairRoutingType.FAIR_PARALLELISM, + numScatherAndGatherSourceTasks, partitionStats, 1000, + expectedDestinationTasks, 3, newEdgeManagers); + } + // Create a DAG with one destination vertexes connected to 3 source vertexes. // There are 3 tasks for each vertex. One edge is of type SCATTER_GATHER. // The other edges are BROADCAST. private void testSchedulingWithPartitionStats( - FairRoutingType fairRoutingType, int expectedScheduledTasks, - int expectedNumDestinationConsumerTasks, + FairRoutingType fairRoutingType, int numTasks, long[] partitionStats, + int numCompletedEvents, + int expectedScheduledTasks, int expectedNumDestinationConsumerTasks, Map newEdgeManagers) throws Exception { Configuration conf = new Configuration(); @@ -227,7 +265,7 @@ private void testSchedulingWithPartitionStats( HashMap mockInputVertices = new HashMap(); String r1 = "R1"; - final int numOfTasksInr1 = 3; + final int numOfTasksInr1 = numTasks; EdgeProperty eProp1 = EdgeProperty.create( EdgeProperty.DataMovementType.SCATTER_GATHER, EdgeProperty.DataSourceType.PERSISTED, @@ -272,7 +310,7 @@ private void testSchedulingWithPartitionStats( doAnswer(new reconfigVertexAnswer(mockContext, mockManagedVertexId, newEdgeManagers)).when(mockContext).reconfigureVertex( - anyInt(), any(VertexLocationHint.class), anyMap()); + anyInt(), any(), anyMap()); // check initialization manager = createFairShuffleVertexManager(conf, mockContext, @@ -291,20 +329,16 @@ private void testSchedulingWithPartitionStats( manager.totalNumBipartiteSourceTasks); Assert.assertEquals(0, manager.numBipartiteSourceTasksCompleted); - //Send an event for r1. - manager.onSourceTaskCompleted(createTaskAttemptIdentifier(r1, 0)); Assert.assertTrue(manager.pendingTasks.size() == numOfTasksInDestination); // no tasks scheduled Assert.assertTrue(manager.totalNumBipartiteSourceTasks == numOfTasksInr1); - long[] sizes = new long[]{(50 * MB), (200 * MB), (500 * MB)}; - VertexManagerEvent vmEvent = getVertexManagerEvent(sizes, 800 * MB, - r1, true); - manager.onVertexManagerEventReceived(vmEvent); //send VM event - //stats from another task - sizes = new long[]{(60 * MB), (300 * MB), (600 * MB)}; - vmEvent = getVertexManagerEvent(sizes, 1200 * MB, r1, true); - manager.onVertexManagerEventReceived(vmEvent); //send VM event + for (int i = 0; i < numCompletedEvents; i++) { + VertexManagerEvent vmEvent = getVertexManagerEvent(partitionStats, 0, + r1, true); + manager.onSourceTaskCompleted(vmEvent.getProducerAttemptIdentifier()); + manager.onVertexManagerEventReceived(vmEvent); //send VM event + } //Send an event for m2. manager.onSourceTaskCompleted(createTaskAttemptIdentifier(m2, 0)); diff --git a/tez-runtime-library/src/test/java/org/apache/tez/dag/library/vertexmanager/TestInputReadyVertexManager.java b/tez-runtime-library/src/test/java/org/apache/tez/dag/library/vertexmanager/TestInputReadyVertexManager.java index d59439e735..2eaaba4871 100644 --- a/tez-runtime-library/src/test/java/org/apache/tez/dag/library/vertexmanager/TestInputReadyVertexManager.java +++ b/tez-runtime-library/src/test/java/org/apache/tez/dag/library/vertexmanager/TestInputReadyVertexManager.java @@ -30,7 +30,6 @@ import org.apache.tez.dag.api.InputDescriptor; import org.apache.tez.dag.api.OutputDescriptor; import org.apache.tez.dag.api.TezUncheckedException; -import org.apache.tez.dag.api.VertexLocationHint; import org.apache.tez.dag.api.VertexManagerPluginContext; import org.apache.tez.dag.api.EdgeProperty.SchedulingType; import org.apache.tez.dag.api.VertexManagerPluginContext.ScheduleTaskRequest; @@ -275,8 +274,7 @@ public void testComplex() throws Exception { } catch (TezUncheckedException e) { e.getMessage().contains("1-1 source vertices must have identical concurrency"); } - verify(mockContext, times(1)).reconfigureVertex(anyInt(), (VertexLocationHint) any(), - anyMap()); // not invoked + verify(mockContext, times(1)).reconfigureVertex(anyInt(), any(), any()); // not invoked when(mockContext.getVertexNumTasks(mockSrcVertexId3)).thenReturn(3); @@ -288,8 +286,7 @@ public void testComplex() throws Exception { manager.onVertexStateUpdated(new VertexStateUpdate(mockSrcVertexId1, VertexState.CONFIGURED)); manager.onVertexStateUpdated(new VertexStateUpdate(mockSrcVertexId2, VertexState.CONFIGURED)); manager.onVertexStateUpdated(new VertexStateUpdate(mockSrcVertexId3, VertexState.CONFIGURED)); - verify(mockContext, times(1)).reconfigureVertex(anyInt(), (VertexLocationHint) any(), - anyMap()); // not invoked + verify(mockContext, times(1)).reconfigureVertex(anyInt(), any(), any()); // not invoked verify(mockContext, times(2)).doneReconfiguringVertex(); manager.onVertexStarted(initialCompletions); // all 1-1 0's done but not scheduled because v1 is not done diff --git a/tez-runtime-library/src/test/java/org/apache/tez/dag/library/vertexmanager/TestShuffleVertexManager.java b/tez-runtime-library/src/test/java/org/apache/tez/dag/library/vertexmanager/TestShuffleVertexManager.java index b824d0b908..fcbcb42ac4 100644 --- a/tez-runtime-library/src/test/java/org/apache/tez/dag/library/vertexmanager/TestShuffleVertexManager.java +++ b/tez-runtime-library/src/test/java/org/apache/tez/dag/library/vertexmanager/TestShuffleVertexManager.java @@ -36,12 +36,12 @@ import java.util.List; import java.util.Map; -import static org.mockito.Matchers.eq; import static org.mockito.Mockito.any; import static org.mockito.Mockito.anyInt; import static org.mockito.Mockito.anyList; import static org.mockito.Mockito.anyMap; import static org.mockito.Mockito.doAnswer; +import static org.mockito.Mockito.eq; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.times; import static org.mockito.Mockito.verify; @@ -142,8 +142,8 @@ public void testLargeDataSize() throws IOException { manager.onVertexManagerEventReceived(vmEvent); manager.onSourceTaskCompleted(createTaskAttemptIdentifier(mockSrcVertexId2, 1)); // Auto-reduce is triggered - verify(mockContext, times(1)).reconfigureVertex(anyInt(), any(VertexLocationHint.class), anyMap()); - verify(mockContext, times(1)).reconfigureVertex(eq(2), any(VertexLocationHint.class), anyMap()); + verify(mockContext, times(1)).reconfigureVertex(anyInt(), any(), anyMap()); + verify(mockContext, times(1)).reconfigureVertex(eq(2), any(), anyMap()); Assert.assertEquals(2, newEdgeManagers.size()); Assert.assertEquals(0, manager.pendingTasks.size()); // all tasks scheduled Assert.assertEquals(2, scheduledTasks.size()); diff --git a/tez-runtime-library/src/test/java/org/apache/tez/dag/library/vertexmanager/TestShuffleVertexManagerBase.java b/tez-runtime-library/src/test/java/org/apache/tez/dag/library/vertexmanager/TestShuffleVertexManagerBase.java index 96f46d60f1..75866248c3 100644 --- a/tez-runtime-library/src/test/java/org/apache/tez/dag/library/vertexmanager/TestShuffleVertexManagerBase.java +++ b/tez-runtime-library/src/test/java/org/apache/tez/dag/library/vertexmanager/TestShuffleVertexManagerBase.java @@ -27,7 +27,6 @@ import org.apache.tez.dag.api.InputDescriptor; import org.apache.tez.dag.api.OutputDescriptor; import org.apache.tez.dag.api.TezUncheckedException; -import org.apache.tez.dag.api.VertexLocationHint; import org.apache.tez.dag.api.VertexManagerPluginContext; import org.apache.tez.dag.api.event.VertexState; import org.apache.tez.dag.api.event.VertexStateUpdate; @@ -114,8 +113,7 @@ public void testZeroSourceTasksWithVertexStartedFirst() { manager.onVertexStateUpdated(new VertexStateUpdate(mockSrcVertexId2, VertexState.CONFIGURED)); manager.onVertexStateUpdated(new VertexStateUpdate(mockSrcVertexId3, VertexState.CONFIGURED)); Assert.assertTrue(manager.pendingTasks.isEmpty()); - verify(mockContext, times(1)).reconfigureVertex(eq(1), any - (VertexLocationHint.class), anyMap()); + verify(mockContext, times(1)).reconfigureVertex(eq(1), any(), anyMap()); verify(mockContext, times(1)).doneReconfiguringVertex(); // reconfig done Assert.assertTrue(scheduledTasks.size() == 1); // all tasks scheduled and parallelism changed scheduledTasks.clear(); @@ -153,8 +151,7 @@ public void testZeroSourceTasksWithVertexStateUpdatedFirst() { // trigger start and processing of pending notification events manager.onVertexStarted(emptyCompletions); Assert.assertTrue(manager.bipartiteSources == 2); - verify(mockContext, times(1)).reconfigureVertex(eq(1), any - (VertexLocationHint.class), anyMap()); + verify(mockContext, times(1)).reconfigureVertex(eq(1), any(), anyMap()); verify(mockContext, times(1)).doneReconfiguringVertex(); // reconfig done Assert.assertTrue(manager.pendingTasks.isEmpty()); Assert.assertTrue(scheduledTasks.size() == 1); // all tasks scheduled and parallelism changed @@ -210,7 +207,7 @@ public void testPartitionStats() throws IOException { //{5,9,12,18} in bitmap final long MB = 1024l * 1024l; long[] sizes = new long[]{(0l), (1 * MB), (964 * MB), (48 * MB)}; - VertexManagerEvent vmEvent = getVertexManagerEvent(sizes, 1L, "Vertex", false); + VertexManagerEvent vmEvent = getVertexManagerEvent(sizes, 0, "Vertex", false); manager = createManager(conf, mockContext, 0.01f, 0.75f); manager.onVertexStarted(emptyCompletions); @@ -239,7 +236,7 @@ public void testPartitionStats() throws IOException { Assert.assertEquals(10, manager.getCurrentlyKnownStatsAtIndex(3)); //10 MB bucket // Testing for detailed partition stats - vmEvent = getVertexManagerEvent(sizes, 1L, "Vertex", true); + vmEvent = getVertexManagerEvent(sizes, 0, "Vertex", true); manager = createManager(conf, mockContext, 0.01f, 0.75f); manager.onVertexStarted(emptyCompletions); @@ -322,8 +319,8 @@ public void testTez978() throws IOException { vmEvent = getVertexManagerEvent(null, 160 * MB, mockSrcVertexId2); manager.onVertexManagerEventReceived(vmEvent); Assert.assertTrue(manager.determineParallelismAndApply(0.25f)); //ensure parallelism is determined - verify(mockContext, times(1)).reconfigureVertex(anyInt(), any(VertexLocationHint.class), anyMap()); - verify(mockContext, times(1)).reconfigureVertex(eq(2), any(VertexLocationHint.class), anyMap()); + verify(mockContext, times(1)).reconfigureVertex(anyInt(), any(), anyMap()); + verify(mockContext, times(1)).reconfigureVertex(eq(2), any(), anyMap()); manager.onSourceTaskCompleted(createTaskAttemptIdentifier(mockSrcVertexId2, 0)); Assert.assertEquals(0, manager.pendingTasks.size()); Assert.assertEquals(2, scheduledTasks.size()); @@ -340,7 +337,7 @@ public void testTez978() throws IOException { //min/max fraction of 0.0/0.2 manager = createManager(conf, mockContext, 0.0f, 0.2f); // initial invocation count == 3 - verify(mockContext, times(1)).reconfigureVertex(anyInt(), any(VertexLocationHint.class), anyMap()); + verify(mockContext, times(1)).reconfigureVertex(anyInt(), any(), anyMap()); manager.onVertexStarted(emptyCompletions); manager.onVertexStateUpdated(new VertexStateUpdate(mockSrcVertexId1, VertexState.CONFIGURED)); manager.onVertexStateUpdated(new VertexStateUpdate(mockSrcVertexId2, VertexState.CONFIGURED)); @@ -354,18 +351,18 @@ public void testTez978() throws IOException { manager.onVertexManagerEventReceived(getVertexManagerEvent(null, 10 * MB, mockSrcVertexId1)); manager.onSourceTaskCompleted(createTaskAttemptIdentifier(mockSrcVertexId1, i)); //should not change parallelism - verify(mockContext, times(1)).reconfigureVertex(anyInt(), any(VertexLocationHint.class), anyMap()); + verify(mockContext, times(1)).reconfigureVertex(anyInt(), any(), anyMap()); } for(int i=0;i<3;i++) { manager.onSourceTaskCompleted(createTaskAttemptIdentifier(mockSrcVertexId2, i)); - verify(mockContext, times(1)).reconfigureVertex(anyInt(), any(VertexLocationHint.class), anyMap()); + verify(mockContext, times(1)).reconfigureVertex(anyInt(), any(), anyMap()); } //Since max threshold (40 * 0.2 = 8) is met, vertex manager should determine parallelism manager.onSourceTaskCompleted(createTaskAttemptIdentifier(mockSrcVertexId2, 8)); // parallelism updated - verify(mockContext, times(2)).reconfigureVertex(anyInt(), any(VertexLocationHint.class), anyMap()); + verify(mockContext, times(2)).reconfigureVertex(anyInt(), any(), anyMap()); // check exact update value - 8 events with 100 each => 20 -> 2000 => 2 tasks (with 1000 per task) - verify(mockContext, times(2)).reconfigureVertex(eq(2), any(VertexLocationHint.class), anyMap()); + verify(mockContext, times(2)).reconfigureVertex(eq(2), any(), anyMap()); } @Test(timeout = 5000) @@ -418,8 +415,8 @@ public void testAutoParallelism() throws Exception { manager.onSourceTaskCompleted(createTaskAttemptIdentifier(mockSrcVertexId2, 1)); // managedVertex tasks reduced - verify(mockContext, times(1)).reconfigureVertex(anyInt(), any(VertexLocationHint.class), anyMap()); - verify(mockContext, times(1)).reconfigureVertex(eq(2), any(VertexLocationHint.class), anyMap()); + verify(mockContext, times(1)).reconfigureVertex(anyInt(), any(), anyMap()); + verify(mockContext, times(1)).reconfigureVertex(eq(2), any(), anyMap()); Assert.assertEquals(2, newEdgeManagers.size()); // TODO improve tests for parallelism Assert.assertEquals(0, manager.pendingTasks.size()); // all tasks scheduled @@ -432,7 +429,7 @@ public void testAutoParallelism() throws Exception { // more completions dont cause recalculation of parallelism manager.onSourceTaskCompleted(createTaskAttemptIdentifier(mockSrcVertexId2, 0)); - verify(mockContext, times(1)).reconfigureVertex(anyInt(), any(VertexLocationHint.class), anyMap()); + verify(mockContext, times(1)).reconfigureVertex(anyInt(), any(), anyMap()); Assert.assertEquals(2, newEdgeManagers.size()); EdgeManagerPluginOnDemand edgeManager = @@ -493,7 +490,7 @@ public void testShuffleVertexManagerSlowStart() { String mockManagedVertexId = "Vertex4"; VertexManagerPluginContext mockContext = mock(VertexManagerPluginContext.class); - when(mockContext.getVertexStatistics(any(String.class))).thenReturn(mock(VertexStatistics.class)); + when(mockContext.getVertexStatistics(any())).thenReturn(mock(VertexStatistics.class)); when(mockContext.getInputVertexEdgeProperties()).thenReturn(mockInputVertices); when(mockContext.getVertexName()).thenReturn(mockManagedVertexId); when(mockContext.getVertexNumTasks(mockManagedVertexId)).thenReturn(3); @@ -877,8 +874,7 @@ public void test_Tez1649_with_scatter_gather_edges() throws IOException { Assert.assertTrue(manager.totalNumBipartiteSourceTasks == 6); //Ensure that setVertexParallelism is not called for R2. - verify(mockContext_R2, times(0)).reconfigureVertex(anyInt(), any(VertexLocationHint.class), - anyMap()); + verify(mockContext_R2, times(0)).reconfigureVertex(anyInt(), any(), anyMap()); //ShuffleVertexManager's updatePendingTasks relies on getVertexNumTasks. Setting this for test when(mockContext_R2.getVertexNumTasks(mockManagedVertexId_R2)).thenReturn(1); @@ -886,9 +882,8 @@ public void test_Tez1649_with_scatter_gather_edges() throws IOException { // complete configuration of r1 triggers the scheduling manager.onVertexStateUpdated(new VertexStateUpdate(r1, VertexState.CONFIGURED)); Assert.assertTrue(manager.totalNumBipartiteSourceTasks == 9); - verify(mockContext_R2, times(1)).reconfigureVertex(eq(1), any(VertexLocationHint.class), - anyMap()); - + verify(mockContext_R2, times(1)).reconfigureVertex(eq(1), any(), anyMap()); + Assert.assertTrue(manager.pendingTasks.size() == 0); // all tasks scheduled Assert.assertTrue(scheduledTasks.size() == 1); diff --git a/tez-runtime-library/src/test/java/org/apache/tez/dag/library/vertexmanager/TestShuffleVertexManagerUtils.java b/tez-runtime-library/src/test/java/org/apache/tez/dag/library/vertexmanager/TestShuffleVertexManagerUtils.java index 439d65011b..5d1509754b 100644 --- a/tez-runtime-library/src/test/java/org/apache/tez/dag/library/vertexmanager/TestShuffleVertexManagerUtils.java +++ b/tez-runtime-library/src/test/java/org/apache/tez/dag/library/vertexmanager/TestShuffleVertexManagerUtils.java @@ -20,6 +20,7 @@ import com.google.protobuf.ByteString; +import java.util.Arrays; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.DataOutputBuffer; import org.apache.tez.common.ReflectionUtils; @@ -32,7 +33,6 @@ import org.apache.tez.dag.api.InputDescriptor; import org.apache.tez.dag.api.OutputDescriptor; import org.apache.tez.dag.api.UserPayload; -import org.apache.tez.dag.api.VertexLocationHint; import org.apache.tez.dag.api.VertexManagerPluginContext; import org.apache.tez.dag.library.vertexmanager.FairShuffleVertexManager.FairRoutingType; import org.apache.tez.dag.library.vertexmanager.FairShuffleVertexManager.FairShuffleVertexManagerConfigBuilder; @@ -56,10 +56,10 @@ import java.util.List; import java.util.Map; -import static org.mockito.Matchers.any; -import static org.mockito.Matchers.anyInt; -import static org.mockito.Matchers.anyList; -import static org.mockito.Matchers.anyMap; +import static org.mockito.Mockito.any; +import static org.mockito.Mockito.anyInt; +import static org.mockito.Mockito.anyList; +import static org.mockito.Mockito.anyMap; import static org.mockito.Mockito.doAnswer; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; @@ -113,29 +113,40 @@ VertexManagerPluginContext createVertexManagerContext( mockContext).scheduleTasks(anyList()); doAnswer(new reconfigVertexAnswer(mockContext, mockManagedVertexId, newEdgeManagers)).when(mockContext).reconfigureVertex( - anyInt(), any(VertexLocationHint.class), anyMap()); + anyInt(), any(), anyMap()); return mockContext; } VertexManagerEvent getVertexManagerEvent(long[] sizes, - long totalSize, String vertexName) throws IOException { - return getVertexManagerEvent(sizes, totalSize, vertexName, false); + long inputSize, String vertexName) throws IOException { + return getVertexManagerEvent(sizes, inputSize, vertexName, false); } - VertexManagerEvent getVertexManagerEvent(long[] sizes, - long totalSize, String vertexName, boolean reportDetailedStats) + VertexManagerEvent getVertexManagerEvent(long[] partitionSizes, + long uncompressedTotalSize, String vertexName, boolean reportDetailedStats) throws IOException { ByteBuffer payload; - if (sizes != null) { - RoaringBitmap partitionStats = ShuffleUtils.getPartitionStatsForPhysicalOutput(sizes); + final long totalSize; + // Use partition sizes to compute the total size. + if (partitionSizes != null) { + totalSize = Arrays.stream(partitionSizes).sum(); + } else { + totalSize = uncompressedTotalSize; + } + if (partitionSizes != null) { + RoaringBitmap partitionStats = + ShuffleUtils.getPartitionStatsForPhysicalOutput(partitionSizes); DataOutputBuffer dout = new DataOutputBuffer(); partitionStats.serialize(dout); ByteString - partitionStatsBytes = TezCommonUtils.compressByteArrayToByteString(dout.getData()); + partitionStatsBytes = TezCommonUtils.compressByteArrayToByteString( + dout.getData()); if (reportDetailedStats) { payload = VertexManagerEventPayloadProto.newBuilder() .setOutputSize(totalSize) - .setDetailedPartitionStats(ShuffleUtils.getDetailedPartitionStatsForPhysicalOutput(sizes)) + .setDetailedPartitionStats( + ShuffleUtils.getDetailedPartitionStatsForPhysicalOutput( + partitionSizes)) .build().toByteString() .asReadOnlyByteBuffer(); } else { diff --git a/tez-runtime-library/src/test/java/org/apache/tez/dag/library/vertexmanager/TestVertexManagerWithConcurrentInput.java b/tez-runtime-library/src/test/java/org/apache/tez/dag/library/vertexmanager/TestVertexManagerWithConcurrentInput.java new file mode 100644 index 0000000000..619a4cdd1d --- /dev/null +++ b/tez-runtime-library/src/test/java/org/apache/tez/dag/library/vertexmanager/TestVertexManagerWithConcurrentInput.java @@ -0,0 +1,114 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tez.dag.library.vertexmanager; + +import org.apache.tez.dag.api.EdgeManagerPluginDescriptor; +import org.apache.tez.dag.api.EdgeProperty; +import org.apache.tez.dag.api.InputDescriptor; +import org.apache.tez.dag.api.OutputDescriptor; +import org.apache.tez.dag.api.VertexManagerPluginContext; +import org.apache.tez.dag.api.VertexManagerPluginDescriptor; +import org.apache.tez.dag.api.event.VertexState; +import org.apache.tez.dag.api.event.VertexStateUpdate; +import org.apache.tez.dag.library.edgemanager.SilentEdgeManager; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; +import org.mockito.ArgumentCaptor; +import org.mockito.Captor; +import org.mockito.MockitoAnnotations; + +import java.util.Collections; +import java.util.HashMap; +import java.util.List; + +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +public class TestVertexManagerWithConcurrentInput { + + @Captor + ArgumentCaptor> requestCaptor; + + @Before + public void init() { + MockitoAnnotations.initMocks(this); + } + + @Test(timeout = 5000) + public void testBasicVertexWithConcurrentInput() throws Exception { + HashMap mockInputVertices = + new HashMap(); + String mockSrcVertexId1 = "Vertex1"; + int srcVertex1Parallelism = 2; + EdgeProperty eProp1 = EdgeProperty.create( + EdgeManagerPluginDescriptor.create(SilentEdgeManager.class.getName()), + EdgeProperty.DataSourceType.EPHEMERAL, + EdgeProperty.SchedulingType.CONCURRENT, + OutputDescriptor.create("out"), + InputDescriptor.create("in")); + + String mockSrcVertexId2 = "Vertex2"; + int srcVertex2Parallelism = 3; + EdgeProperty eProp2 = EdgeProperty.create( + EdgeManagerPluginDescriptor.create(SilentEdgeManager.class.getName()), + EdgeProperty.DataSourceType.EPHEMERAL, + EdgeProperty.SchedulingType.CONCURRENT, + OutputDescriptor.create("out"), + InputDescriptor.create("in")); + + String mockManagedVertexId = "Vertex"; + int vertexParallelism = 2; + + VertexManagerWithConcurrentInput.ConcurrentInputVertexManagerConfigBuilder configurer = + VertexManagerWithConcurrentInput.createConfigBuilder(null); + VertexManagerPluginDescriptor pluginDesc = configurer.build(); + + VertexManagerPluginContext mockContext = mock(VertexManagerPluginContext.class); + when(mockContext.getUserPayload()).thenReturn(pluginDesc.getUserPayload()); + when(mockContext.getInputVertexEdgeProperties()).thenReturn(mockInputVertices); + when(mockContext.getVertexName()).thenReturn(mockManagedVertexId); + when(mockContext.getVertexNumTasks(mockManagedVertexId)).thenReturn(vertexParallelism); + when(mockContext.getVertexNumTasks(mockSrcVertexId1)).thenReturn(srcVertex1Parallelism); + when(mockContext.getVertexNumTasks(mockSrcVertexId2)).thenReturn(srcVertex2Parallelism); + mockInputVertices.put(mockSrcVertexId1, eProp1); + mockInputVertices.put(mockSrcVertexId2, eProp2); + + VertexManagerWithConcurrentInput manager = new VertexManagerWithConcurrentInput(mockContext); + when(mockContext.getUserPayload()).thenReturn(pluginDesc.getUserPayload()); + manager.initialize(); + when(mockContext.getUserPayload()).thenReturn(pluginDesc.getUserPayload()); + + // source vertex 1 configured + manager.onVertexStateUpdated(new VertexStateUpdate(mockSrcVertexId1, VertexState.CONFIGURED)); + verify(mockContext, times(0)).scheduleTasks(requestCaptor.capture()); + + // source vertex 2 configured + manager.onVertexStateUpdated(new VertexStateUpdate(mockSrcVertexId2, VertexState.CONFIGURED)); + verify(mockContext, times(0)).scheduleTasks(requestCaptor.capture()); + + // then own vertex started + manager.onVertexStarted(Collections.singletonList( + TestShuffleVertexManager.createTaskAttemptIdentifier(mockSrcVertexId1, 0))); + verify(mockContext, times(1)).scheduleTasks(requestCaptor.capture()); + Assert.assertEquals(0, manager.completedUpstreamTasks); + } +} diff --git a/tez-runtime-library/src/test/java/org/apache/tez/http/TestHttpConnection.java b/tez-runtime-library/src/test/java/org/apache/tez/http/TestHttpConnection.java index ed4ed54c2e..afeb6e561f 100644 --- a/tez-runtime-library/src/test/java/org/apache/tez/http/TestHttpConnection.java +++ b/tez-runtime-library/src/test/java/org/apache/tez/http/TestHttpConnection.java @@ -24,8 +24,6 @@ import org.junit.AfterClass; import org.junit.BeforeClass; import org.junit.Test; -import org.mockito.invocation.InvocationOnMock; -import org.mockito.stubbing.Answer; import java.io.IOException; import java.net.ConnectException; @@ -42,7 +40,7 @@ import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; -import static org.mockito.Mockito.doAnswer; +import static org.mockito.Mockito.any; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.spy; import static org.mockito.Mockito.when; @@ -78,6 +76,7 @@ public Thread newThread(Runnable r) { }); url = new URL(NOT_HOSTED_URL); tokenSecretManager = mock(JobTokenSecretManager.class); + when(tokenSecretManager.computeHash(any())).thenReturn("1234".getBytes()); } @AfterClass @@ -89,7 +88,7 @@ public void baseTest(Callable worker, CountDownLatch latch, String message InterruptedException { long startTime = System.currentTimeMillis(); try { - Future future = executorService.submit(worker); + Future future = executorService.submit(worker); future.get(); } catch (ExecutionException e) { assertTrue(e.getCause().getCause() instanceof IOException); @@ -117,14 +116,13 @@ public void testConnectionTimeout() throws IOException, InterruptedException { } @Test(timeout = 20000) - @SuppressWarnings("unchecked") //Should be interruptible public void testAsyncHttpConnectionInterrupt() throws IOException, InterruptedException, ExecutionException { CountDownLatch latch = new CountDownLatch(1); HttpConnectionParams params = getConnectionParams(); AsyncHttpConnection asyncHttpConn = getAsyncHttpConnection(params); - Future future = executorService.submit(new Worker(latch, asyncHttpConn, true)); + Future future = executorService.submit(new Worker(latch, asyncHttpConn, true)); while(currentThread == null) { synchronized (this) { @@ -153,24 +151,14 @@ HttpConnectionParams getConnectionParams() { HttpConnection getHttpConnection(HttpConnectionParams params) throws IOException { HttpConnection realConn = new HttpConnection(url, params, "log", tokenSecretManager); HttpConnection connection = spy(realConn); - - doAnswer(new Answer() { - public Void answer(InvocationOnMock invocation) { - return null; - } - }).when(connection).computeEncHash(); + realConn.computeEncHash(); return connection; } AsyncHttpConnection getAsyncHttpConnection(HttpConnectionParams params) throws IOException { AsyncHttpConnection realConn = new AsyncHttpConnection(url, params, "log", tokenSecretManager); AsyncHttpConnection connection = spy(realConn); - - doAnswer(new Answer() { - public Void answer(InvocationOnMock invocation) { - return null; - } - }).when(connection).computeEncHash(); + realConn.computeEncHash(); return connection; } diff --git a/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/cartesianproduct/TestCartesianProductVertexManagerPartitioned.java b/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/cartesianproduct/TestCartesianProductVertexManagerPartitioned.java index 1012a36fb4..a77e2c712e 100644 --- a/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/cartesianproduct/TestCartesianProductVertexManagerPartitioned.java +++ b/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/cartesianproduct/TestCartesianProductVertexManagerPartitioned.java @@ -21,7 +21,6 @@ import org.apache.tez.dag.api.EdgeProperty; import org.apache.tez.dag.api.TezReflectionException; import org.apache.tez.dag.api.UserPayload; -import org.apache.tez.dag.api.VertexLocationHint; import org.apache.tez.dag.api.VertexManagerPluginContext; import org.apache.tez.dag.api.VertexManagerPluginContext.ScheduleTaskRequest; import org.apache.tez.dag.api.event.VertexState; @@ -37,7 +36,6 @@ import org.junit.Test; import org.mockito.ArgumentCaptor; import org.mockito.Captor; -import org.mockito.Matchers; import org.mockito.MockitoAnnotations; import java.util.ArrayList; @@ -48,8 +46,9 @@ import static org.apache.tez.dag.api.EdgeProperty.DataMovementType.BROADCAST; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNull; -import static org.mockito.Matchers.eq; -import static org.mockito.Matchers.isNull; +import static org.mockito.Mockito.any; +import static org.mockito.Mockito.eq; +import static org.mockito.Mockito.isNull; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.never; import static org.mockito.Mockito.times; @@ -75,7 +74,7 @@ public void setup() throws TezReflectionException { private void setupWithConfig(CartesianProductConfigProto config) throws TezReflectionException { - MockitoAnnotations.initMocks(this); + MockitoAnnotations.openMocks(this); context = mock(VertexManagerPluginContext.class); when(context.getVertexName()).thenReturn("cp"); when(context.getVertexNumTasks("cp")).thenReturn(-1); @@ -110,7 +109,7 @@ private void testReconfigureVertexHelper(CartesianProductConfigProto config, vertexManager.onVertexStateUpdated(new VertexStateUpdate("v0", VertexState.CONFIGURED)); verify(context, times(1)).reconfigureVertex(parallelismCaptor.capture(), - isNull(VertexLocationHint.class), edgePropertiesCaptor.capture()); + isNull(), edgePropertiesCaptor.capture()); assertEquals((int)parallelismCaptor.getValue(), parallelism); assertNull(edgePropertiesCaptor.getValue()); } @@ -134,12 +133,12 @@ public void testScheduling() throws Exception { vertexManager.onSourceTaskCompleted(allCompletions.get(0)); vertexManager.onSourceTaskCompleted(allCompletions.get(1)); - verify(context, never()).scheduleTasks(Matchers.>any()); + verify(context, never()).scheduleTasks(any()); List scheduleTaskRequests; vertexManager.onSourceTaskCompleted(allCompletions.get(2)); // shouldn't start schedule because broadcast src is not in RUNNING state - verify(context, never()).scheduleTasks(Matchers.>any()); + verify(context, never()).scheduleTasks(any()); vertexManager.onVertexStateUpdated(new VertexStateUpdate("v2", VertexState.RUNNING)); verify(context, times(1)).scheduleTasks(scheduleTaskRequestCaptor.capture()); @@ -161,7 +160,7 @@ public void testScheduling() throws Exception { for (int i = 6; i < 8; i++) { vertexManager.onSourceTaskCompleted(allCompletions.get(i)); - verify(context, times(4)).scheduleTasks(Matchers.>any()); + verify(context, times(4)).scheduleTasks(any()); } } @@ -191,7 +190,7 @@ private void testOnVertexStartHelper(boolean broadcastRunning) throws Exception vertexManager.onVertexStarted(completions); if (!broadcastRunning) { - verify(context, never()).scheduleTasks(Matchers.>any()); + verify(context, never()).scheduleTasks(any()); vertexManager.onVertexStateUpdated(new VertexStateUpdate("v2", VertexState.RUNNING)); } diff --git a/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/cartesianproduct/TestFairCartesianProductVertexManager.java b/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/cartesianproduct/TestFairCartesianProductVertexManager.java index 6219a158d0..54d771ad08 100644 --- a/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/cartesianproduct/TestFairCartesianProductVertexManager.java +++ b/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/cartesianproduct/TestFairCartesianProductVertexManager.java @@ -22,7 +22,6 @@ import com.google.protobuf.InvalidProtocolBufferException; import org.apache.tez.dag.api.EdgeManagerPluginDescriptor; import org.apache.tez.dag.api.EdgeProperty; -import org.apache.tez.dag.api.VertexLocationHint; import org.apache.tez.dag.api.VertexManagerPluginContext; import org.apache.tez.dag.api.VertexManagerPluginContext.ScheduleTaskRequest; import org.apache.tez.dag.api.event.VertexState; @@ -52,10 +51,10 @@ import static org.junit.Assert.assertArrayEquals; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; -import static org.mockito.Matchers.any; -import static org.mockito.Matchers.anyInt; -import static org.mockito.Matchers.anyMapOf; -import static org.mockito.Matchers.eq; +import static org.mockito.Mockito.any; +import static org.mockito.Mockito.anyInt; +import static org.mockito.Mockito.anyMap; +import static org.mockito.Mockito.eq; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.never; import static org.mockito.Mockito.times; @@ -72,7 +71,7 @@ public class TestFairCartesianProductVertexManager { @Before public void setup() { - MockitoAnnotations.initMocks(this); + MockitoAnnotations.openMocks(this); ctx = mock(VertexManagerPluginContext.class); vertexManager = new FairCartesianProductVertexManager(ctx); } @@ -224,11 +223,11 @@ public void testDAGVertexOnlyGroupByMaxParallelism() throws Exception { vertexManager.onVertexManagerEventReceived(getVMEvent(250, "v0", 0)); verify(ctx, never()).reconfigureVertex( - anyInt(), any(VertexLocationHint.class), anyMapOf(String.class, EdgeProperty.class)); + anyInt(), any(), anyMap()); vertexManager.onVertexManagerEventReceived(getVMEvent(200, "v1", 0)); verify(ctx, times(1)).reconfigureVertex( - eq(30), any(VertexLocationHint.class), edgePropertiesCaptor.capture()); + eq(30), any(), edgePropertiesCaptor.capture()); Map edgeProperties = edgePropertiesCaptor.getValue(); verifyEdgeProperties(edgeProperties.get("v0"), new String[]{"v0", "v1"}, new int[]{5, 6}, 30); verifyVertexGroupInfo(edgeProperties.get("v0"), 0); @@ -259,7 +258,7 @@ public void testDAGVertexOnlyGroupByMinOpsPerWorker() throws Exception { } verify(ctx, times(1)).reconfigureVertex( - eq(12), any(VertexLocationHint.class), edgePropertiesCaptor.capture()); + eq(12), any(), edgePropertiesCaptor.capture()); Map edgeProperties = edgePropertiesCaptor.getValue(); verifyEdgeProperties(edgeProperties.get("v0"), new String[]{"v0", "v1"}, new int[]{4, 3}, 100); verifyEdgeProperties(edgeProperties.get("v1"), new String[]{"v0", "v1"}, new int[]{4, 3}, 100); @@ -289,7 +288,7 @@ public void testDAGVertexGroup() throws Exception { vertexManager.onVertexManagerEventReceived(getVMEvent(5, "v2", 0)); vertexManager.onVertexManagerEventReceived(getVMEvent(5, "v2", 1)); verify(ctx, times(1)).reconfigureVertex( - eq(100), any(VertexLocationHint.class), edgePropertiesCaptor.capture()); + eq(100), any(), edgePropertiesCaptor.capture()); Map edgeProperties = edgePropertiesCaptor.getValue(); for (int i = 0; i < 3; i++) { verifyEdgeProperties(edgeProperties.get("v" + i), new String[]{"v0", "g0"}, @@ -323,7 +322,7 @@ public void testDAGVertexGroupOnly() throws Exception { vertexManager.onVertexManagerEventReceived(getVMEvent(16, "v3", 0)); verify(ctx, times(1)).reconfigureVertex( - eq(100), any(VertexLocationHint.class), edgePropertiesCaptor.capture()); + eq(100), any(), edgePropertiesCaptor.capture()); Map edgeProperties = edgePropertiesCaptor.getValue(); for (int i = 0; i < 4; i++) { verifyEdgeProperties(edgeProperties.get("v" + i), new String[]{"g0", "g1"}, @@ -352,7 +351,7 @@ public void testSchedulingVertexOnlyWithBroadcast() throws Exception { vertexManager.onVertexManagerEventReceived(getVMEvent(250, "v0", 0)); vertexManager.onVertexManagerEventReceived(getVMEvent(200, "v1", 0)); verify(ctx, times(1)).reconfigureVertex( - eq(30), any(VertexLocationHint.class), edgePropertiesCaptor.capture()); + eq(30), any(), edgePropertiesCaptor.capture()); assertFalse(edgePropertiesCaptor.getValue().containsKey("v2")); vertexManager.onVertexStarted(null); @@ -400,7 +399,7 @@ public void testZeroSrcTask() throws Exception { vertexManager.initialize(config); vertexManager.onVertexStateUpdated(new VertexStateUpdate("v0", VertexState.CONFIGURED)); vertexManager.onVertexStateUpdated(new VertexStateUpdate("v1", VertexState.CONFIGURED)); - vertexManager.onVertexStarted(new ArrayList()); + vertexManager.onVertexStarted(new ArrayList<>()); vertexManager.onSourceTaskCompleted(getTaId("v0", 0)); vertexManager.onSourceTaskCompleted(getTaId("v0", 1)); } @@ -430,11 +429,11 @@ public void testGroupingFraction() throws Exception { vertexManager.onSourceTaskCompleted(getTaId("v1", i)); } verify(ctx, never()).reconfigureVertex( - anyInt(), any(VertexLocationHint.class), anyMapOf(String.class, EdgeProperty.class)); + anyInt(), any(), anyMap()); vertexManager.onSourceTaskCompleted(getTaId("v1", 14)); verify(ctx, times(1)).reconfigureVertex( - eq(24), any(VertexLocationHint.class), edgePropertiesCaptor.capture()); + eq(24), any(), edgePropertiesCaptor.capture()); } @Test(timeout = 5000) @@ -448,7 +447,7 @@ public void testGroupFractionWithZeroStats() throws Exception { vertexManager.onSourceTaskCompleted(getTaId("v1", i)); } verify(ctx, never()).reconfigureVertex( - anyInt(), any(VertexLocationHint.class), anyMapOf(String.class, EdgeProperty.class)); + anyInt(), any(), anyMap()); } @Test(timeout = 5000) @@ -462,7 +461,7 @@ public void testGroupingFractionWithZeroOutput() throws Exception { vertexManager.onSourceTaskCompleted(getTaId("v1", i)); } verify(ctx, times(1)).reconfigureVertex( - eq(0), any(VertexLocationHint.class), edgePropertiesCaptor.capture()); + eq(0), any(), edgePropertiesCaptor.capture()); } @Test(timeout = 5000) @@ -476,7 +475,7 @@ public void testZeroSrcOutput() throws Exception { vertexManager.onVertexManagerEventReceived(getVMEvent(0, "v1", 1)); vertexManager.onVertexManagerEventReceived(getVMEvent(0, "v1", 2)); verify(ctx, times(1)).reconfigureVertex( - eq(0), any(VertexLocationHint.class), edgePropertiesCaptor.capture()); + eq(0), any(), edgePropertiesCaptor.capture()); } @Test(timeout = 5000) @@ -495,7 +494,7 @@ public void testDisableGrouping() throws Exception { vertexManager.onVertexManagerEventReceived(getVMEvent(250, "v0", 0)); vertexManager.onVertexManagerEventReceived(getVMEvent(200, "v1", 0)); verify(ctx, times(1)).reconfigureVertex( - eq(6), any(VertexLocationHint.class), edgePropertiesCaptor.capture()); + eq(6), any(), edgePropertiesCaptor.capture()); } @Test(timeout = 5000) @@ -511,7 +510,7 @@ public void testParallelismTwoSkewedSource() throws Exception { } verify(ctx, times(1)).reconfigureVertex( - eq(99), any(VertexLocationHint.class), edgePropertiesCaptor.capture()); + eq(99), any(), edgePropertiesCaptor.capture()); Map edgeProperties = edgePropertiesCaptor.getValue(); verifyEdgeProperties(edgeProperties.get("v0"), new String[]{"v0", "v1"}, new int[]{99, 1}, 100); @@ -539,9 +538,9 @@ public void testParallelismThreeSkewedSource() throws Exception { } verify(ctx, times(1)).reconfigureVertex( - eq(93), any(VertexLocationHint.class), edgePropertiesCaptor.capture()); + eq(93), any(), edgePropertiesCaptor.capture()); Map edgeProperties = edgePropertiesCaptor.getValue(); verifyEdgeProperties(edgeProperties.get("v0"), new String[]{"v0", "v1", "v2"}, new int[]{31, 3, 1}, 100); } -} \ No newline at end of file +} diff --git a/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/TestConfigUtils.java b/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/TestConfigUtils.java new file mode 100644 index 0000000000..24f76cc832 --- /dev/null +++ b/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/TestConfigUtils.java @@ -0,0 +1,85 @@ +/** + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tez.runtime.library.common; + +import org.apache.hadoop.conf.Configurable; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.io.WritableComparable; +import org.apache.hadoop.io.WritableComparator; +import org.apache.tez.runtime.library.api.TezRuntimeConfiguration; +import org.junit.Test; + +import java.io.DataInput; +import java.io.DataOutput; + +import static org.junit.Assert.assertEquals; + + +public class TestConfigUtils { + + private static class CustomKey implements WritableComparable, Configurable { + + private Configuration conf; + + @Override + public int compareTo(CustomKey o) { + return 0; + } + + @Override + public void write(DataOutput out) { + + } + + @Override + public void readFields(DataInput in) { + + } + + @Override + public void setConf(Configuration conf) { + this.conf = conf; + } + + @Override + public Configuration getConf() { + return conf; + } + } + + @Test + public void getIntermediateOutputKeyComparator() { + Configuration conf = new Configuration(); + String testKey = "test_flag_name"; + String testValue = "tez"; + conf.set(testKey, testValue); + conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_KEY_CLASS, CustomKey.class.getName()); + WritableComparator rawComparator = (WritableComparator) ConfigUtils.getIntermediateOutputKeyComparator(conf); + CustomKey customKey = (CustomKey) rawComparator.newKey(); + assertEquals(testValue, customKey.getConf().get(testKey)); + } + + @Test + public void getIntermediateInputKeyComparator() { + Configuration conf = new Configuration(); + String testKey = "test_flag_name"; + String testValue = "tez"; + conf.set(testKey, testValue); + conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_KEY_CLASS, CustomKey.class.getName()); + WritableComparator rawComparator = (WritableComparator) ConfigUtils.getIntermediateInputKeyComparator(conf); + CustomKey customKey = (CustomKey) rawComparator.newKey(); + assertEquals(testValue, customKey.getConf().get(testKey)); + } +} \ No newline at end of file diff --git a/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/TestInputIdentifiers.java b/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/TestInputIdentifiers.java index 6b82a9d27d..5eb3b5030a 100644 --- a/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/TestInputIdentifiers.java +++ b/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/TestInputIdentifiers.java @@ -41,4 +41,23 @@ public void testInputAttemptIdentifier() { Assert.assertTrue(set.add(i4)); } + @Test(timeout = 5000) + public void testInputAttemptIdentifierIncludes() { + InputAttemptIdentifier inputData0Attempt0 = new InputAttemptIdentifier(0, 0); + InputAttemptIdentifier inputData1Attempt0 = new InputAttemptIdentifier(1, 0); + InputAttemptIdentifier inputData2Attempt0 = new InputAttemptIdentifier(2, 0); + InputAttemptIdentifier inputData3Attempt0 = new InputAttemptIdentifier(3, 0); + InputAttemptIdentifier inputData1Attempt1 = new InputAttemptIdentifier(1, 1); + CompositeInputAttemptIdentifier inputData12Attempt0 = new CompositeInputAttemptIdentifier(1, 0, null, 2); + + Assert.assertTrue(inputData1Attempt0.includes(inputData1Attempt0)); + Assert.assertFalse(inputData1Attempt0.includes(inputData2Attempt0)); + Assert.assertFalse(inputData1Attempt0.includes(inputData1Attempt1)); + + Assert.assertFalse(inputData12Attempt0.includes(inputData0Attempt0)); + Assert.assertTrue(inputData12Attempt0.includes(inputData1Attempt0)); + Assert.assertTrue(inputData12Attempt0.includes(inputData2Attempt0)); + Assert.assertFalse(inputData12Attempt0.includes(inputData3Attempt0)); + Assert.assertFalse(inputData12Attempt0.includes(inputData1Attempt1)); + } } diff --git a/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/TestValuesIterator.java b/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/TestValuesIterator.java index 642f02b569..eaded181b9 100644 --- a/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/TestValuesIterator.java +++ b/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/TestValuesIterator.java @@ -6,6 +6,7 @@ import com.google.common.collect.Lists; import java.nio.ByteBuffer; + import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; @@ -36,6 +37,7 @@ import org.apache.tez.runtime.api.InputContext; import org.apache.tez.runtime.library.api.TezRuntimeConfiguration; import org.apache.tez.runtime.library.common.comparator.TezBytesComparator; +import org.apache.tez.runtime.library.common.serializer.SerializationContext; import org.apache.tez.runtime.library.common.serializer.TezBytesWritableSerialization; import org.apache.tez.runtime.library.common.shuffle.orderedgrouped.InMemoryReader; import org.apache.tez.runtime.library.common.shuffle.orderedgrouped.InMemoryWriter; @@ -102,8 +104,7 @@ enum TestWithComparator { FileSystem fs; static final Random rnd = new Random(); - final Class keyClass; - final Class valClass; + private SerializationContext serializationContext; final RawComparator comparator; final RawComparator correctComparator; final boolean expectedTestResult; @@ -129,20 +130,18 @@ enum TestWithComparator { * @param testResult expected result * @throws IOException */ - public TestValuesIterator(String serializationClassName, Class key, Class val, + public TestValuesIterator(String serializationClassName, Class key, Class val, TestWithComparator comparator, TestWithComparator correctComparator, boolean testResult) throws IOException { - this.keyClass = key; - this.valClass = val; this.comparator = getComparator(comparator); this.correctComparator = (correctComparator == null) ? this.comparator : getComparator(correctComparator); this.expectedTestResult = testResult; originalData = LinkedListMultimap.create(); - setupConf(serializationClassName); + setupConf(key, val, serializationClassName); } - private void setupConf(String serializationClassName) throws IOException { + private void setupConf(Class key, Class val, String serializationClassName) throws IOException { mergeFactor = 2; conf = new Configuration(); conf.setInt(TezRuntimeConfiguration.TEZ_RUNTIME_IO_SORT_FACTOR, mergeFactor); @@ -154,6 +153,11 @@ private void setupConf(String serializationClassName) throws IOException { String localDirs = baseDir.toString(); conf.setStrings(TezRuntimeFrameworkConfigs.LOCAL_DIRS, localDirs); fs = FileSystem.getLocal(conf); + + SerializationFactory serializationFactory = new SerializationFactory(conf); + serializationContext = new SerializationContext(key, val, + serializationFactory.getSerialization(key), serializationFactory.getSerialization(val)); + serializationContext.applyToConf(conf); } @Before @@ -231,20 +235,21 @@ private ValuesIterator createEmptyIterator(boolean inMemory) streamPaths = new Path[0]; //This will return EmptyIterator rawKeyValueIterator = - TezMerger.merge(conf, fs, keyClass, valClass, null, + TezMerger.merge(conf, fs, serializationContext, null, false, -1, 1024, streamPaths, false, mergeFactor, tmpDir, comparator, new ProgressReporter(), null, null, null, null); } else { List segments = Lists.newLinkedList(); //This will return EmptyIterator rawKeyValueIterator = - TezMerger.merge(conf, fs, keyClass, valClass, segments, mergeFactor, tmpDir, + TezMerger.merge(conf, fs, serializationContext, segments, mergeFactor, tmpDir, comparator, new ProgressReporter(), new GenericCounter("readsCounter", "y"), new GenericCounter("writesCounter", "y1"), new GenericCounter("bytesReadCounter", "y2"), new Progress()); } return new ValuesIterator(rawKeyValueIterator, comparator, - keyClass, valClass, conf, (TezCounter) new GenericCounter("inputKeyCounter", "y3"), + serializationContext.getKeyClass(), serializationContext.getValueClass(), conf, + (TezCounter) new GenericCounter("inputKeyCounter", "y3"), (TezCounter) new GenericCounter("inputValueCounter", "y4")); } @@ -332,19 +337,20 @@ private ValuesIterator createIterator(boolean inMemory) throws IOException, Inte streamPaths = createFiles(); //Merge all files to get KeyValueIterator rawKeyValueIterator = - TezMerger.merge(conf, fs, keyClass, valClass, null, + TezMerger.merge(conf, fs, serializationContext, null, false, -1, 1024, streamPaths, false, mergeFactor, tmpDir, comparator, new ProgressReporter(), null, null, null, null); } else { List segments = createInMemStreams(); rawKeyValueIterator = - TezMerger.merge(conf, fs, keyClass, valClass, segments, mergeFactor, tmpDir, + TezMerger.merge(conf, fs, serializationContext, segments, mergeFactor, tmpDir, comparator, new ProgressReporter(), new GenericCounter("readsCounter", "y"), new GenericCounter("writesCounter", "y1"), new GenericCounter("bytesReadCounter", "y2"), new Progress()); } - return new ValuesIterator(rawKeyValueIterator, comparator, - keyClass, valClass, conf, (TezCounter) new GenericCounter("inputKeyCounter", "y3"), + return new ValuesIterator(rawKeyValueIterator, comparator, serializationContext.getKeyClass(), + serializationContext.getValueClass(), conf, + (TezCounter) new GenericCounter("inputKeyCounter", "y3"), (TezCounter) new GenericCounter("inputValueCounter", "y4")); } @@ -364,19 +370,19 @@ private ValuesIterator createCountedIterator(boolean inMemory, TezCounter keyCou streamPaths = createFiles(); //Merge all files to get KeyValueIterator rawKeyValueIterator = - TezMerger.merge(conf, fs, keyClass, valClass, null, + TezMerger.merge(conf, fs, serializationContext, null, false, -1, 1024, streamPaths, false, mergeFactor, tmpDir, comparator, new ProgressReporter(), null, null, null, null); } else { List segments = createInMemStreams(); rawKeyValueIterator = - TezMerger.merge(conf, fs, keyClass, valClass, segments, mergeFactor, tmpDir, + TezMerger.merge(conf, fs, serializationContext, segments, mergeFactor, tmpDir, comparator, new ProgressReporter(), new GenericCounter("readsCounter", "y"), new GenericCounter("writesCounter", "y1"), new GenericCounter("bytesReadCounter", "y2"), new Progress()); } - return new ValuesIterator(rawKeyValueIterator, comparator, - keyClass, valClass, conf, keyCounter, tupleCounter); + return new ValuesIterator(rawKeyValueIterator, comparator, serializationContext.getKeyClass(), + serializationContext.getValueClass(), conf, keyCounter, tupleCounter); } @Parameterized.Parameters(name = "test[{0}, {1}, {2}, {3} {4} {5} {6}]") @@ -454,7 +460,9 @@ private Path[] createFiles() throws IOException { paths[i] = new Path(baseDir, "ifile_" + i + ".out"); FSDataOutputStream out = fs.create(paths[i]); //write data with RLE - IFile.Writer writer = new IFile.Writer(conf, out, keyClass, valClass, null, null, null, true); + IFile.Writer writer = new IFile.Writer(serializationContext.getKeySerialization(), + serializationContext.getValSerialization(), out, serializationContext.getKeyClass(), + serializationContext.getValueClass(), null, null, null, true); Map data = createData(); for (Map.Entry entry : data.entrySet()) { @@ -487,9 +495,8 @@ public List createInMemStreams() throws IOException { int numberOfStreams = Math.max(2, rnd.nextInt(10)); LOG.info("No of streams : " + numberOfStreams); - SerializationFactory serializationFactory = new SerializationFactory(conf); - Serializer keySerializer = serializationFactory.getSerializer(keyClass); - Serializer valueSerializer = serializationFactory.getSerializer(valClass); + Serializer keySerializer = serializationContext.getKeySerializer(); + Serializer valueSerializer = serializationContext.getValueSerializer(); LocalDirAllocator localDirAllocator = new LocalDirAllocator(TezRuntimeFrameworkConfigs.LOCAL_DIRS); @@ -549,8 +556,8 @@ private InputContext createTezInputContext() { private Map createData() { Map map = new TreeMap(comparator); for (int j = 0; j < Math.max(10, rnd.nextInt(50)); j++) { - Writable key = createData(keyClass); - Writable value = createData(valClass); + Writable key = createData(serializationContext.getKeyClass()); + Writable value = createData(serializationContext.getValueClass()); map.put(key, value); //sortedDataMap.put(key, value); } @@ -558,7 +565,7 @@ private Map createData() { } - private Writable createData(Class c) { + private Writable createData(Class c) { if (c.getName().equalsIgnoreCase(BytesWritable.class.getName())) { return new BytesWritable(new BigInteger(256, rnd).toString().getBytes()); } else if (c.getName().equalsIgnoreCase(IntWritable.class.getName())) { diff --git a/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/readers/TestUnorderedKVReader.java b/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/readers/TestUnorderedKVReader.java index c49a423883..42231e988e 100644 --- a/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/readers/TestUnorderedKVReader.java +++ b/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/readers/TestUnorderedKVReader.java @@ -23,6 +23,7 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.Text; +import org.apache.hadoop.io.serializer.WritableSerialization; import org.apache.tez.common.counters.TaskCounter; import org.apache.tez.common.counters.TezCounter; import org.apache.tez.common.counters.TezCounters; @@ -96,7 +97,7 @@ private void setupReader() throws IOException, InterruptedException { createIFile(outputPath, 1); final LinkedList inputs = new LinkedList(); - LocalDiskFetchedInput realFetchedInput = new LocalDiskFetchedInput(0, rawLen, compLen, new + LocalDiskFetchedInput realFetchedInput = new LocalDiskFetchedInput(0, compLen, new InputAttemptIdentifier(0, 0), outputPath, defaultConf, new FetchedInputCallback() { @Override public void fetchComplete(FetchedInput fetchedInput) { @@ -131,8 +132,8 @@ public void freeResources(FetchedInput fetchedInput) { private void createIFile(Path path, int recordCount) throws IOException { FSDataOutputStream out = localFs.create(path); - IFile.Writer writer = - new IFile.Writer(defaultConf, out, Text.class, Text.class, null, null, null, true); + IFile.Writer writer = new IFile.Writer(new WritableSerialization(), new WritableSerialization(), + out, Text.class, Text.class, null, null, null, true); for (int i = 0; i < recordCount; i++) { writer.append(new Text("Key_" + i), new Text("Value_" + i)); diff --git a/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/shuffle/TestFetcher.java b/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/shuffle/TestFetcher.java index b0311541ac..efba8ea6a7 100644 --- a/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/shuffle/TestFetcher.java +++ b/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/shuffle/TestFetcher.java @@ -18,20 +18,23 @@ package org.apache.tez.runtime.library.common.shuffle; +import org.apache.hadoop.fs.RawLocalFileSystem; import org.apache.tez.runtime.library.common.CompositeInputAttemptIdentifier; -import static org.mockito.Matchers.anyInt; -import static org.mockito.Matchers.anyLong; -import static org.mockito.Matchers.anyString; -import static org.mockito.Matchers.eq; + import static org.mockito.Mockito.any; +import static org.mockito.Mockito.anyInt; +import static org.mockito.Mockito.anyLong; +import static org.mockito.Mockito.anyString; import static org.mockito.Mockito.doAnswer; import static org.mockito.Mockito.doNothing; import static org.mockito.Mockito.doReturn; +import static org.mockito.Mockito.eq; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.never; import static org.mockito.Mockito.spy; import static org.mockito.Mockito.verify; +import java.io.DataInputStream; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; @@ -46,10 +49,16 @@ import org.apache.hadoop.fs.LocalFileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.yarn.api.records.ApplicationId; +import org.apache.tez.common.counters.TezCounters; import org.apache.tez.dag.api.TezConfiguration; +import org.apache.tez.runtime.api.InputContext; import org.apache.tez.runtime.library.api.TezRuntimeConfiguration; import org.apache.tez.runtime.library.common.InputAttemptIdentifier; +import org.apache.tez.runtime.library.common.shuffle.api.ShuffleHandlerError; +import org.apache.tez.runtime.library.common.shuffle.impl.ShuffleManager; +import org.apache.tez.runtime.library.common.shuffle.orderedgrouped.ShuffleHeader; import org.apache.tez.runtime.library.common.sort.impl.TezIndexRecord; +import org.apache.tez.runtime.library.testutils.RuntimeTestUtils; import org.junit.Assert; import org.junit.Test; import org.mockito.ArgumentCaptor; @@ -73,13 +82,14 @@ public void testLocalFetchModeSetting() throws Exception { final boolean DISABLE_LOCAL_FETCH = false; Fetcher.FetcherBuilder builder = new Fetcher.FetcherBuilder(fetcherCallback, null, null, - ApplicationId.newInstance(0, 1), 1, null, "fetcherTest", conf, ENABLE_LOCAL_FETCH, HOST, + createMockInputContext(), null, conf, ENABLE_LOCAL_FETCH, HOST, PORT, false, true, false); builder.assignWork(HOST, PORT, 0, 1, Arrays.asList(srcAttempts)); Fetcher fetcher = spy(builder.build()); FetchResult fr = new FetchResult(HOST, PORT, 0, 1, Arrays.asList(srcAttempts)); - Fetcher.HostFetchResult hfr = new Fetcher.HostFetchResult(fr, srcAttempts, false); + Fetcher.HostFetchResult hfr = + new Fetcher.HostFetchResult(fr, InputAttemptFetchFailure.fromAttempts(srcAttempts), false); doReturn(hfr).when(fetcher).setupLocalDiskFetch(); doReturn(null).when(fetcher).doHttpFetch(); doNothing().when(fetcher).shutdown(); @@ -91,7 +101,7 @@ public void testLocalFetchModeSetting() throws Exception { // when enabled and hostname does not match use http fetch. builder = new Fetcher.FetcherBuilder(fetcherCallback, null, null, - ApplicationId.newInstance(0, 1), -1, null, "fetcherTest", conf, ENABLE_LOCAL_FETCH, HOST, + createMockInputContext(), null, conf, ENABLE_LOCAL_FETCH, HOST, PORT, false, true, false); builder.assignWork(HOST + "_OTHER", PORT, 0, 1, Arrays.asList(srcAttempts)); fetcher = spy(builder.build()); @@ -107,7 +117,7 @@ public void testLocalFetchModeSetting() throws Exception { // when enabled and port does not match use http fetch. builder = new Fetcher.FetcherBuilder(fetcherCallback, null, null, - ApplicationId.newInstance(0, 1), -1, null, "fetcherTest", conf, ENABLE_LOCAL_FETCH, HOST, + createMockInputContext(), null, conf, ENABLE_LOCAL_FETCH, HOST, PORT, false, true, false); builder.assignWork(HOST, PORT + 1, 0, 1, Arrays.asList(srcAttempts)); fetcher = spy(builder.build()); @@ -124,7 +134,7 @@ public void testLocalFetchModeSetting() throws Exception { // When disabled use http fetch conf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_OPTIMIZE_LOCAL_FETCH, false); builder = new Fetcher.FetcherBuilder(fetcherCallback, null, null, - ApplicationId.newInstance(0, 1), 1, null, "fetcherTest", conf, DISABLE_LOCAL_FETCH, HOST, + createMockInputContext(), null, conf, DISABLE_LOCAL_FETCH, HOST, PORT, false, true, false); builder.assignWork(HOST, PORT, 0, 1, Arrays.asList(srcAttempts)); fetcher = spy(builder.build()); @@ -151,14 +161,14 @@ public void testSetupLocalDiskFetch() throws Exception { }; final int FIRST_FAILED_ATTEMPT_IDX = 2; final int SECOND_FAILED_ATTEMPT_IDX = 4; - final int[] sucessfulAttempts = {0, 1, 3}; + final int[] successfulAttempts = {0, 1, 3}; TezConfiguration conf = new TezConfiguration(); conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_OPTIMIZE_LOCAL_FETCH, "true"); int partition = 42; FetcherCallback callback = mock(FetcherCallback.class); Fetcher.FetcherBuilder builder = new Fetcher.FetcherBuilder(callback, null, null, - ApplicationId.newInstance(0, 1), 1, null, "fetcherTest", conf, true, HOST, PORT, + createMockInputContext(), null, conf, true, HOST, PORT, false, true, true); ArrayList inputAttemptIdentifiers = new ArrayList<>(); for(CompositeInputAttemptIdentifier compositeInputAttemptIdentifier : srcAttempts) { @@ -184,7 +194,7 @@ public Path answer(InvocationOnMock invocation) throws Throwable { Object[] args = invocation.getArguments(); return new Path(SHUFFLE_INPUT_FILE_PREFIX + args[0]); } - }).when(fetcher).getShuffleInputFileName(anyString(), anyString()); + }).when(fetcher).getShuffleInputFileName(anyString(), any()); doAnswer(new Answer() { @Override @@ -206,18 +216,24 @@ public TezIndexRecord answer(InvocationOnMock invocation) throws Throwable { doNothing().when(fetcher).shutdown(); doNothing().when(callback).fetchSucceeded(anyString(), any(InputAttemptIdentifier.class), any(FetchedInput.class), anyLong(), anyLong(), anyLong()); - doNothing().when(callback).fetchFailed(anyString(), any(InputAttemptIdentifier.class), eq(false)); + doNothing().when(callback).fetchFailed(anyString(), any(InputAttemptFetchFailure.class), eq(false)); FetchResult fetchResult = fetcher.call(); verify(fetcher).setupLocalDiskFetch(); - // expect 3 sucesses and 2 failures - for (int i : sucessfulAttempts) { + // expect 3 successes and 2 failures + for (int i : successfulAttempts) { verifyFetchSucceeded(callback, srcAttempts[i], conf); } - verify(callback).fetchFailed(eq(HOST), eq(srcAttempts[FIRST_FAILED_ATTEMPT_IDX]), eq(false)); - verify(callback).fetchFailed(eq(HOST), eq(srcAttempts[SECOND_FAILED_ATTEMPT_IDX]), eq(false)); + verify(callback).fetchFailed(eq(HOST), + eq(InputAttemptFetchFailure + .fromCompositeAttemptLocalFetchFailure(srcAttempts[FIRST_FAILED_ATTEMPT_IDX])), + eq(false)); + verify(callback).fetchFailed(eq(HOST), + eq(InputAttemptFetchFailure + .fromCompositeAttemptLocalFetchFailure(srcAttempts[SECOND_FAILED_ATTEMPT_IDX])), + eq(false)); Assert.assertEquals("fetchResult host", fetchResult.getHost(), HOST); Assert.assertEquals("fetchResult partition", fetchResult.getPartition(), partition); @@ -244,10 +260,9 @@ protected void verifyFetchSucceeded(FetcherCallback callback, CompositeInputAtte LocalDiskFetchedInput f = capturedFetchedInput.getValue(); Assert.assertEquals("success callback filename", f.getInputFile().toString(), SHUFFLE_INPUT_FILE_PREFIX + pathComponent); - Assert.assertTrue("success callback fs", f.getLocalFS() instanceof LocalFileSystem); + Assert.assertTrue("success callback fs", f.getLocalFS() instanceof RawLocalFileSystem); Assert.assertEquals("success callback filesystem", f.getStartOffset(), p * 10); - Assert.assertEquals("success callback raw size", f.getActualSize(), p * 1000); - Assert.assertEquals("success callback compressed size", f.getCompressedSize(), p * 100); + Assert.assertEquals("success callback compressed size", f.getSize(), p * 100); Assert.assertEquals("success callback input id", f.getInputAttemptIdentifier(), srcAttempId.expand(0)); Assert.assertEquals("success callback type", f.getType(), FetchedInput.Type.DISK_DIRECT); } @@ -292,7 +307,7 @@ public void testInputAttemptIdentifierMap() { int partition = 42; FetcherCallback callback = mock(FetcherCallback.class); Fetcher.FetcherBuilder builder = new Fetcher.FetcherBuilder(callback, null, null, - ApplicationId.newInstance(0, 1), 1, null, "fetcherTest", conf, true, HOST, PORT, + createMockInputContext(), null, conf, true, HOST, PORT, false, true, false); builder.assignWork(HOST, PORT, partition, 1, Arrays.asList(srcAttempts)); Fetcher fetcher = spy(builder.build()); @@ -305,4 +320,41 @@ public void testInputAttemptIdentifierMap() { Assert.assertTrue(expectedSrcAttempts[count++].toString().compareTo(key) == 0); } } + + @Test + public void testShuffleHandlerDiskErrorUnordered() + throws Exception { + Configuration conf = new Configuration(); + + InputContext inputContext = mock(InputContext.class); + doReturn(new TezCounters()).when(inputContext).getCounters(); + doReturn("vertex").when(inputContext).getSourceVertexName(); + + Fetcher.FetcherBuilder builder = new Fetcher.FetcherBuilder(mock(ShuffleManager.class), null, + null, createMockInputContext(), null, conf, true, HOST, PORT, + false, true, false); + builder.assignWork(HOST, PORT, 0, 1, Arrays.asList(new InputAttemptIdentifier(0, 0))); + + Fetcher fetcher = builder.build(); + ShuffleHeader header = + new ShuffleHeader(ShuffleHandlerError.DISK_ERROR_EXCEPTION.toString(), -1, -1, -1); + DataInputStream input = RuntimeTestUtils.shuffleHeaderToDataInput(header); + + InputAttemptFetchFailure[] failures = + fetcher.fetchInputs(input, null, new InputAttemptIdentifier(0, 0)); + Assert.assertEquals(1, failures.length); + Assert.assertTrue(failures[0].isDiskErrorAtSource()); + Assert.assertFalse(failures[0].isLocalFetch()); + } + + private InputContext createMockInputContext() { + InputContext inputContext = mock(InputContext.class); + + doReturn(ApplicationId.newInstance(0, 1)).when(inputContext).getApplicationId(); + doReturn(1).when(inputContext).getDagIdentifier(); + doReturn("sourceVertex").when(inputContext).getSourceVertexName(); + doReturn("taskVertex").when(inputContext).getTaskVertexName(); + + return inputContext; + } } diff --git a/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/shuffle/TestShuffleUtils.java b/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/shuffle/TestShuffleUtils.java index 1d2d4280b5..3363cb6627 100644 --- a/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/shuffle/TestShuffleUtils.java +++ b/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/shuffle/TestShuffleUtils.java @@ -3,6 +3,7 @@ import com.google.common.collect.Lists; import com.google.protobuf.ByteString; +import org.apache.hadoop.conf.Configurable; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; @@ -10,6 +11,8 @@ import org.apache.hadoop.io.Text; import org.apache.hadoop.io.compress.CompressionCodec; import org.apache.hadoop.io.compress.CompressionInputStream; +import org.apache.hadoop.io.compress.CompressionOutputStream; +import org.apache.hadoop.io.compress.Compressor; import org.apache.hadoop.io.compress.Decompressor; import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.tez.common.TezCommonUtils; @@ -33,7 +36,7 @@ import org.junit.Assert; import org.junit.Before; import org.junit.Test; -import org.mockito.Matchers; +import org.mockito.ArgumentMatchers; import org.slf4j.Logger; import java.io.ByteArrayInputStream; @@ -41,6 +44,7 @@ import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; +import java.net.SocketTimeoutException; import java.nio.ByteBuffer; import java.util.Arrays; import java.util.BitSet; @@ -148,7 +152,7 @@ private Path createIndexFile(int numPartitions, boolean allEmptyPartitions) thro startOffset += partLen; spillRecord.putIndex(indexRecord, i); } - spillRecord.writeToFile(path, conf); + spillRecord.writeToFile(path, conf, FileSystem.getLocal(conf).getRaw()); return path; } @@ -253,28 +257,28 @@ public void testGenerateOnSpillEvent_With_All_EmptyPartitions() throws Exception outputContext, spillId, new TezSpillRecord(indexFile, conf), physicalOutputs, true, pathComponent, null, false, auxiliaryService, TezCommonUtils.newBestCompressionDeflater()); - Assert.assertTrue(events.size() == 2); //one for VM + Assert.assertEquals(2, events.size()); //one for VM Assert.assertTrue(events.get(0) instanceof VertexManagerEvent); Assert.assertTrue(events.get(1) instanceof CompositeDataMovementEvent); CompositeDataMovementEvent cdme = (CompositeDataMovementEvent) events.get(1); - Assert.assertTrue(cdme.getCount() == physicalOutputs); - Assert.assertTrue(cdme.getSourceIndexStart() == 0); + Assert.assertEquals(cdme.getCount(), physicalOutputs); + Assert.assertEquals(0, cdme.getSourceIndexStart()); ShuffleUserPayloads.DataMovementEventPayloadProto dmeProto = ShuffleUserPayloads.DataMovementEventPayloadProto.parseFrom(ByteString.copyFrom( cdme.getUserPayload())); //spill details should be present - Assert.assertTrue(dmeProto.getSpillId() == 0); + Assert.assertEquals(0, dmeProto.getSpillId()); Assert.assertTrue(dmeProto.hasLastEvent() && dmeProto.getLastEvent()); - Assert.assertTrue(dmeProto.getPathComponent().equals("")); + Assert.assertEquals("", dmeProto.getPathComponent()); byte[] emptyPartitions = TezCommonUtils.decompressByteStringToByteArray(dmeProto .getEmptyPartitions()); BitSet emptyPartitionsBitSet = TezUtilsInternal.fromByteArray(emptyPartitions); - Assert.assertTrue("emptyPartitionBitSet cardinality (expecting 10) = " + emptyPartitionsBitSet - .cardinality(), emptyPartitionsBitSet.cardinality() == 10); + Assert.assertEquals("emptyPartitionBitSet cardinality (expecting 10) = " + emptyPartitionsBitSet + .cardinality(), 10, emptyPartitionsBitSet.cardinality()); } @@ -285,9 +289,10 @@ public void testInternalErrorTranslation() throws Exception { when(mockCodecStream.read(any(byte[].class), anyInt(), anyInt())) .thenThrow(new InternalError(codecErrorMsg)); Decompressor mockDecoder = mock(Decompressor.class); - CompressionCodec mockCodec = mock(CompressionCodec.class); + ConfigurableCodecForTest mockCodec = mock(ConfigurableCodecForTest.class); + when(mockCodec.getConf()).thenReturn(mock(Configuration.class)); when(mockCodec.createDecompressor()).thenReturn(mockDecoder); - when(mockCodec.createInputStream(any(InputStream.class), any(Decompressor.class))) + when(mockCodec.createInputStream(any(), any())) .thenReturn(mockCodecStream); byte[] header = new byte[] { (byte) 'T', (byte) 'I', (byte) 'F', (byte) 1}; try { @@ -300,6 +305,61 @@ public void testInternalErrorTranslation() throws Exception { } } + @Test + public void testExceptionTranslation() throws Exception { + String codecErrorMsg = "codec failure"; + CompressionInputStream mockCodecStream = mock(CompressionInputStream.class); + when(mockCodecStream.read(any(byte[].class), anyInt(), anyInt())) + .thenThrow(new IllegalArgumentException(codecErrorMsg)); + Decompressor mockDecoder = mock(Decompressor.class); + ConfigurableCodecForTest mockCodec = mock(ConfigurableCodecForTest.class); + when(mockCodec.getConf()).thenReturn(mock(Configuration.class)); + when(mockCodec.createDecompressor()).thenReturn(mockDecoder); + when(mockCodec.createInputStream(any(), any())) + .thenReturn(mockCodecStream); + byte[] header = new byte[] { (byte) 'T', (byte) 'I', (byte) 'F', (byte) 1}; + try { + ShuffleUtils.shuffleToMemory(new byte[1024], new ByteArrayInputStream(header), + 1024, 128, mockCodec, false, 0, mock(Logger.class), null); + Assert.fail("shuffle was supposed to throw!"); + } catch (IOException e) { + Assert.assertTrue(e.getCause() instanceof IllegalArgumentException); + Assert.assertTrue(e.getMessage().contains(codecErrorMsg)); + } + CompressionInputStream mockCodecStream1 = mock(CompressionInputStream.class); + when(mockCodecStream1.read(any(byte[].class), anyInt(), anyInt())) + .thenThrow(new SocketTimeoutException(codecErrorMsg)); + ConfigurableCodecForTest mockCodec1 = mock(ConfigurableCodecForTest.class); + when(mockCodec1.getConf()).thenReturn(mock(Configuration.class)); + when(mockCodec1.createDecompressor()).thenReturn(mockDecoder); + when(mockCodec1.createInputStream(any(), any())) + .thenReturn(mockCodecStream1); + try { + ShuffleUtils.shuffleToMemory(new byte[1024], new ByteArrayInputStream(header), + 1024, 128, mockCodec1, false, 0, mock(Logger.class), null); + Assert.fail("shuffle was supposed to throw!"); + } catch (IOException e) { + Assert.assertTrue(e instanceof SocketTimeoutException); + Assert.assertTrue(e.getMessage().contains(codecErrorMsg)); + } + CompressionInputStream mockCodecStream2 = mock(CompressionInputStream.class); + when(mockCodecStream2.read(any(byte[].class), anyInt(), anyInt())) + .thenThrow(new InternalError(codecErrorMsg)); + ConfigurableCodecForTest mockCodec2 = mock(ConfigurableCodecForTest.class); + when(mockCodec2.getConf()).thenReturn(mock(Configuration.class)); + when(mockCodec2.createDecompressor()).thenReturn(mockDecoder); + when(mockCodec2.createInputStream(any(), any())) + .thenReturn(mockCodecStream2); + try { + ShuffleUtils.shuffleToMemory(new byte[1024], new ByteArrayInputStream(header), + 1024, 128, mockCodec2, false, 0, mock(Logger.class), null); + Assert.fail("shuffle was supposed to throw!"); + } catch (IOException e) { + Assert.assertTrue(e.getCause() instanceof InternalError); + Assert.assertTrue(e.getMessage().contains(codecErrorMsg)); + } + } + @Test public void testShuffleToDiskChecksum() throws Exception { // verify sending a stream of zeroes without checksum validation @@ -334,13 +394,75 @@ public void testFetchStatsLogger() throws Exception { logger.logIndividualFetchComplete(10, 100, 1000, "testType", ident); } verify(activeLogger, times(0)).info(anyString()); - verify(aggregateLogger, times(1)).info(anyString(), Matchers.anyVararg()); + verify(aggregateLogger, times(1)).info(anyString(), ArgumentMatchers.any()); when(activeLogger.isInfoEnabled()).thenReturn(true); for (int i = 0; i < 1000; i++) { logger.logIndividualFetchComplete(10, 100, 1000, "testType", ident); } verify(activeLogger, times(1000)).info(anyString()); - verify(aggregateLogger, times(1)).info(anyString(), Matchers.anyVararg()); + verify(aggregateLogger, times(1)).info(anyString(), ArgumentMatchers.any()); + } + + /** + * A codec class which implements CompressionCodec, Configurable for testing purposes. + */ + public static class ConfigurableCodecForTest implements CompressionCodec, Configurable { + + @Override + public Compressor createCompressor() { + return null; + } + + @Override + public Decompressor createDecompressor() { + return null; + } + + @Override + public CompressionInputStream createInputStream(InputStream arg0) throws IOException { + return null; + } + + @Override + public CompressionInputStream createInputStream(InputStream arg0, Decompressor arg1) + throws IOException { + return null; + } + + @Override + public CompressionOutputStream createOutputStream(OutputStream arg0) throws IOException { + return null; + } + + @Override + public CompressionOutputStream createOutputStream(OutputStream arg0, Compressor arg1) + throws IOException { + return null; + } + + @Override + public Class getCompressorType() { + return null; + } + + @Override + public Class getDecompressorType() { + return null; + } + + @Override + public String getDefaultExtension() { + return null; + } + + @Override + public Configuration getConf() { + return null; + } + + @Override + public void setConf(Configuration arg0) { + } } } diff --git a/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/shuffle/impl/TestShuffleInputEventHandlerImpl.java b/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/shuffle/impl/TestShuffleInputEventHandlerImpl.java index 683422b1a7..f4ddf590ef 100644 --- a/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/shuffle/impl/TestShuffleInputEventHandlerImpl.java +++ b/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/shuffle/impl/TestShuffleInputEventHandlerImpl.java @@ -18,18 +18,22 @@ package org.apache.tez.runtime.library.common.shuffle.impl; -import static org.mockito.Matchers.any; -import static org.mockito.Matchers.anyInt; -import static org.mockito.Matchers.anyString; -import static org.mockito.Matchers.eq; +import static org.mockito.Mockito.any; +import static org.mockito.Mockito.anyBoolean; +import static org.mockito.Mockito.anyInt; +import static org.mockito.Mockito.anyLong; +import static org.mockito.Mockito.anyString; import static org.mockito.Mockito.doReturn; +import static org.mockito.Mockito.eq; import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.mockStatic; import static org.mockito.Mockito.spy; import static org.mockito.Mockito.times; import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; import java.io.IOException; +import java.io.InputStream; import java.nio.ByteBuffer; import java.util.BitSet; import java.util.Collections; @@ -40,6 +44,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.DataOutputBuffer; +import org.apache.hadoop.io.compress.CompressionCodec; import org.apache.hadoop.security.token.Token; import org.apache.tez.common.TezCommonUtils; import org.apache.tez.common.TezExecutors; @@ -54,15 +59,20 @@ import org.apache.tez.runtime.api.Event; import org.apache.tez.runtime.api.ExecutionContext; import org.apache.tez.runtime.api.InputContext; +import org.apache.tez.runtime.api.events.CompositeRoutedDataMovementEvent; import org.apache.tez.runtime.api.events.DataMovementEvent; import org.apache.tez.runtime.library.common.CompositeInputAttemptIdentifier; import org.apache.tez.runtime.library.common.InputAttemptIdentifier; +import org.apache.tez.runtime.library.common.shuffle.FetchedInput; import org.apache.tez.runtime.library.common.shuffle.FetchedInputAllocator; +import org.apache.tez.runtime.library.common.shuffle.MemoryFetchedInput; import org.apache.tez.runtime.library.common.shuffle.ShuffleUtils; +import org.apache.tez.runtime.library.shuffle.impl.ShuffleUserPayloads; import org.apache.tez.runtime.library.shuffle.impl.ShuffleUserPayloads.DataMovementEventPayloadProto; import org.junit.After; import org.junit.Before; import org.junit.Test; +import org.mockito.MockedStatic; import org.mockito.invocation.InvocationOnMock; import org.mockito.stubbing.Answer; @@ -189,6 +199,7 @@ private InputContext createInputContext() throws IOException { InputContext inputContext = mock(InputContext.class); doReturn(new TezCounters()).when(inputContext).getCounters(); doReturn("sourceVertex").when(inputContext).getSourceVertexName(); + doReturn("taskVertex").when(inputContext).getTaskVertexName(); doReturn(shuffleMetaData).when(inputContext) .getServiceProviderMetaData(conf.get(TezConfiguration.TEZ_AM_SHUFFLE_AUXILIARY_SERVICE_ID, TezConfiguration.TEZ_AM_SHUFFLE_AUXILIARY_SERVICE_ID_DEFAULT)); @@ -198,8 +209,8 @@ private InputContext createInputContext() throws IOException { @Override public ExecutorService answer(InvocationOnMock invocation) throws Throwable { return sharedExecutor.createExecutorService( - invocation.getArgumentAt(0, Integer.class), - invocation.getArgumentAt(1, String.class)); + invocation.getArgument(0, Integer.class), + invocation.getArgument(1, String.class)); } }); return inputContext; @@ -264,7 +275,7 @@ public void testPipelinedShuffleEvents() throws IOException { //0--> 1 with spill id 1 (attemptNum 1). This should report exception dme = createDataMovementEvent(true, 0, 1, 1, false, new BitSet(), 4, 1); handler.handleEvents(Collections.singletonList(dme)); - verify(inputContext).killSelf(any(Throwable.class), anyString()); + verify(inputContext).killSelf(any(), anyString()); } /** @@ -296,7 +307,7 @@ public void testPipelinedShuffleEvents_WithOutOfOrderAttempts() throws IOExcepti //Now send attemptNum 0. This should throw exception, because attempt #1 is already added dme = createDataMovementEvent(true, 0, 1, 0, false, new BitSet(), 4, 0); handler.handleEvents(Collections.singletonList(dme)); - verify(inputContext).killSelf(any(Throwable.class), anyString()); + verify(inputContext).killSelf(any(), anyString()); } /** @@ -337,7 +348,54 @@ public void testPipelinedShuffleEvents_WithEmptyPartitions() throws IOException //Now send attemptNum 1. This should throw exception, because attempt #1 is already added dme = createDataMovementEvent(true, 0, 1, 0, false, new BitSet(), 4, 1); handler.handleEvents(Collections.singletonList(dme)); - verify(inputContext).killSelf(any(Throwable.class), anyString()); + verify(inputContext).killSelf(any(), anyString()); + } + + /** + * Verify that data movement events with shuffle data are processed properly. + * + * @throws IOException + */ + @Test(timeout = 5000) + public void testDataMovementEventsWithShuffleData() throws IOException { + InputContext inputContext = mock(InputContext.class); + ShuffleManager shuffleManager = mock(ShuffleManager.class); + ShuffleManager compositeFetchShuffleManager = mock(ShuffleManager.class); + FetchedInputAllocator inputAllocator = mock(FetchedInputAllocator.class); + MemoryFetchedInput memoryFetchedInput = mock(MemoryFetchedInput.class); + + when(memoryFetchedInput.getType()).thenReturn(FetchedInput.Type.MEMORY); + when(memoryFetchedInput.getBytes()).thenReturn("data".getBytes()); + when(inputAllocator.allocate(anyLong(), anyLong(), any(InputAttemptIdentifier.class))) + .thenReturn(memoryFetchedInput); + + ShuffleInputEventHandlerImpl eventHandler = new ShuffleInputEventHandlerImpl(inputContext, + shuffleManager, inputAllocator, null, true, 4, false); + + ShuffleInputEventHandlerImpl compositeFetchEventHandler = new ShuffleInputEventHandlerImpl(inputContext, + compositeFetchShuffleManager, inputAllocator, null, true, 4, true); + + DataMovementEvent dataMovementEvent = (DataMovementEvent) createDataMovementEventWithShuffleData(false); + CompositeRoutedDataMovementEvent compositeRoutedDataMovementEvent = + (CompositeRoutedDataMovementEvent) createDataMovementEventWithShuffleData(true); + + List eventListWithDme = new LinkedList<>(); + eventListWithDme.add(dataMovementEvent); + eventListWithDme.add(compositeRoutedDataMovementEvent); + + try (MockedStatic shuffleUtils = mockStatic(ShuffleUtils.class)) { + shuffleUtils.when(() -> ShuffleUtils + .shuffleToMemory(any(byte[].class), any(InputStream.class), anyInt(), anyInt(), any(CompressionCodec.class), + anyBoolean(), anyInt(), any(), any(InputAttemptIdentifier.class))) + .thenAnswer((Answer) invocation -> null); + eventHandler.handleEvents(eventListWithDme); + compositeFetchEventHandler.handleEvents(eventListWithDme); + + verify(shuffleManager, times(2)) + .addCompletedInputWithData(any(InputAttemptIdentifier.class), any(FetchedInput.class)); + verify(compositeFetchShuffleManager, times(2)) + .addCompletedInputWithData(any(InputAttemptIdentifier.class), any(FetchedInput.class)); + } } private Event createDataMovementEvent(boolean addSpillDetails, int srcIdx, int targetIdx, @@ -395,4 +453,19 @@ private ByteString createEmptyPartitionByteString(int... emptyPartitions) throws return emptyPartitionsBytesString; } + private Event createDataMovementEventWithShuffleData(boolean isComposite) { + DataMovementEventPayloadProto.Builder builder = DataMovementEventPayloadProto.newBuilder(); + builder.setHost(HOST); + builder.setPort(PORT); + builder.setPathComponent(PATH_COMPONENT); + ShuffleUserPayloads.DataProto.Builder dataProtoBuilder = ShuffleUserPayloads.DataProto.newBuilder() + .setData(ByteString.copyFromUtf8("data")); + builder.setData(dataProtoBuilder); + + Event dme = isComposite? + CompositeRoutedDataMovementEvent.create(0, 1, 1, 0, builder.build().toByteString().asReadOnlyByteBuffer()): + DataMovementEvent.create(0, 1, 0, builder.build().toByteString().asReadOnlyByteBuffer()); + return dme; + } + } diff --git a/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/shuffle/impl/TestShuffleManager.java b/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/shuffle/impl/TestShuffleManager.java index f361dc767b..ba854b9c14 100644 --- a/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/shuffle/impl/TestShuffleManager.java +++ b/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/shuffle/impl/TestShuffleManager.java @@ -20,8 +20,10 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; -import static org.mockito.Matchers.anyInt; -import static org.mockito.Matchers.anyString; +import static org.mockito.Mockito.any; +import static org.mockito.Mockito.anyInt; +import static org.mockito.Mockito.anyString; +import static org.mockito.Mockito.atLeast; import static org.mockito.Mockito.doAnswer; import static org.mockito.Mockito.doReturn; import static org.mockito.Mockito.mock; @@ -51,23 +53,25 @@ import org.apache.tez.common.security.JobTokenIdentifier; import org.apache.tez.common.security.JobTokenSecretManager; import org.apache.tez.dag.api.TezConfiguration; -import org.apache.tez.dag.api.TezConstants; import org.apache.tez.runtime.api.Event; import org.apache.tez.runtime.api.ExecutionContext; import org.apache.tez.runtime.api.InputContext; import org.apache.tez.runtime.api.events.DataMovementEvent; +import org.apache.tez.runtime.api.events.InputReadErrorEvent; import org.apache.tez.runtime.library.api.TezRuntimeConfiguration; import org.apache.tez.runtime.library.common.InputAttemptIdentifier; import org.apache.tez.runtime.library.common.shuffle.FetchedInput; import org.apache.tez.runtime.library.common.shuffle.FetchedInputAllocator; import org.apache.tez.runtime.library.common.shuffle.Fetcher; +import org.apache.tez.runtime.library.common.shuffle.InputAttemptFetchFailure; import org.apache.tez.runtime.library.common.shuffle.FetchResult; import org.apache.tez.runtime.library.common.shuffle.InputHost; -import org.apache.tez.runtime.library.common.shuffle.ShuffleUtils; import org.apache.tez.runtime.library.shuffle.impl.ShuffleUserPayloads.DataMovementEventPayloadProto; import org.junit.After; +import org.junit.Assert; import org.junit.Before; import org.junit.Test; +import org.mockito.ArgumentCaptor; import org.mockito.invocation.InvocationOnMock; import org.mockito.stubbing.Answer; @@ -165,6 +169,7 @@ private InputContext createInputContext() throws IOException { InputContext inputContext = mock(InputContext.class); doReturn(new TezCounters()).when(inputContext).getCounters(); doReturn("sourceVertex").when(inputContext).getSourceVertexName(); + doReturn("taskVertex").when(inputContext).getTaskVertexName(); doReturn(shuffleMetaData).when(inputContext) .getServiceProviderMetaData(conf.get(TezConfiguration.TEZ_AM_SHUFFLE_AUXILIARY_SERVICE_ID, TezConfiguration.TEZ_AM_SHUFFLE_AUXILIARY_SERVICE_ID_DEFAULT)); @@ -174,8 +179,8 @@ private InputContext createInputContext() throws IOException { @Override public ExecutorService answer(InvocationOnMock invocation) throws Throwable { return sharedExecutor.createExecutorService( - invocation.getArgumentAt(0, Integer.class), - invocation.getArgumentAt(1, String.class)); + invocation.getArgument(0, Integer.class), + invocation.getArgument(1, String.class)); } }); return inputContext; @@ -193,6 +198,84 @@ public void testUseSharedExecutor() throws Exception { verify(inputContext).createTezFrameworkExecutorService(anyInt(), anyString()); } + @Test (timeout = 20000) + public void testProgressWithEmptyPendingHosts() throws Exception { + InputContext inputContext = createInputContext(); + final ShuffleManager shuffleManager = spy(createShuffleManager(inputContext, 1)); + Thread schedulerGetHostThread = new Thread(new Runnable() { + @Override + public void run() { + try { + shuffleManager.run(); + } catch (Exception e) { + e.printStackTrace(); + } + } + }); + schedulerGetHostThread.start(); + Thread.currentThread().sleep(1000 * 3 + 1000); + schedulerGetHostThread.interrupt(); + verify(inputContext, atLeast(3)).notifyProgress(); + } + + @Test (timeout = 200000) + public void testFetchFailed() throws Exception { + InputContext inputContext = createInputContext(); + final ShuffleManager shuffleManager = spy(createShuffleManager(inputContext, 1)); + Thread schedulerGetHostThread = new Thread(new Runnable() { + @Override + public void run() { + try { + shuffleManager.run(); + } catch (Exception e) { + e.printStackTrace(); + } + } + }); + InputAttemptFetchFailure inputAttemptFetchFailure = + new InputAttemptFetchFailure(new InputAttemptIdentifier(1, 1)); + + schedulerGetHostThread.start(); + Thread.sleep(1000); + shuffleManager.fetchFailed("host1", inputAttemptFetchFailure, false); + Thread.sleep(1000); + + ArgumentCaptor captor = ArgumentCaptor.forClass(List.class); + verify(inputContext, times(1)) + .sendEvents(captor.capture()); + Assert.assertEquals("Size was: " + captor.getAllValues().size(), + captor.getAllValues().size(), 1); + List capturedList = captor.getAllValues().get(0); + Assert.assertEquals("Size was: " + capturedList.size(), + capturedList.size(), 1); + InputReadErrorEvent inputEvent = (InputReadErrorEvent)capturedList.get(0); + Assert.assertEquals("Number of failures was: " + inputEvent.getNumFailures(), + inputEvent.getNumFailures(), 1); + + shuffleManager.fetchFailed("host1", inputAttemptFetchFailure, false); + shuffleManager.fetchFailed("host1", inputAttemptFetchFailure, false); + + Thread.sleep(1000); + verify(inputContext, times(1)).sendEvents(any()); + + // Wait more than five seconds for the batch to go out + Thread.sleep(5000); + captor = ArgumentCaptor.forClass(List.class); + verify(inputContext, times(2)) + .sendEvents(captor.capture()); + Assert.assertEquals("Size was: " + captor.getAllValues().size(), + captor.getAllValues().size(), 2); + capturedList = captor.getAllValues().get(1); + Assert.assertEquals("Size was: " + capturedList.size(), + capturedList.size(), 1); + inputEvent = (InputReadErrorEvent)capturedList.get(0); + Assert.assertEquals("Number of failures was: " + inputEvent.getNumFailures(), + inputEvent.getNumFailures(), 2); + + + schedulerGetHostThread.interrupt(); + } + private ShuffleManagerForTest createShuffleManager( InputContext inputContext, int expectedNumOfPhysicalInputs) throws IOException { @@ -201,6 +284,8 @@ private ShuffleManagerForTest createShuffleManager( doReturn(outDirs).when(inputContext).getWorkDirs(); conf.setStrings(TezRuntimeFrameworkConfigs.LOCAL_DIRS, inputContext.getWorkDirs()); + // 5 seconds + conf.setInt(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_BATCH_WAIT, 5000); DataOutputBuffer out = new DataOutputBuffer(); Token token = new Token(new JobTokenIdentifier(), @@ -277,7 +362,17 @@ boolean isFetcherExecutorShutdown() { static class TestFetchedInput extends FetchedInput { public TestFetchedInput(InputAttemptIdentifier inputAttemptIdentifier) { - super(Type.MEMORY, -1, -1, inputAttemptIdentifier, null); + super(inputAttemptIdentifier, null); + } + + @Override + public long getSize() { + return -1; + } + + @Override + public Type getType() { + return Type.MEMORY; } @Override diff --git a/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/shuffle/impl/TestSimpleFetchedInputAllocator.java b/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/shuffle/impl/TestSimpleFetchedInputAllocator.java index 1b63b17149..01faa5df7a 100644 --- a/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/shuffle/impl/TestSimpleFetchedInputAllocator.java +++ b/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/shuffle/impl/TestSimpleFetchedInputAllocator.java @@ -20,6 +20,7 @@ import static org.junit.Assert.assertEquals; +import java.io.File; import java.io.IOException; import java.util.UUID; @@ -38,7 +39,7 @@ public class TestSimpleFetchedInputAllocator { @Test(timeout = 5000) public void testInMemAllocation() throws IOException { - String localDirs = "/tmp/" + this.getClass().getName(); + File localDirs = new File(System.getProperty("test.build.data", "/tmp"), this.getClass().getName()); Configuration conf = new Configuration(); long jvmMax = Runtime.getRuntime().maxMemory(); @@ -47,7 +48,7 @@ public void testInMemAllocation() throws IOException { float bufferPercent = 0.1f; conf.setFloat(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT, bufferPercent); conf.setFloat(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_MEMORY_LIMIT_PERCENT, 1.0f); - conf.setStrings(TezRuntimeFrameworkConfigs.LOCAL_DIRS, localDirs); + conf.setStrings(TezRuntimeFrameworkConfigs.LOCAL_DIRS, localDirs.getAbsolutePath()); long inMemThreshold = (long) (bufferPercent * jvmMax); LOG.info("InMemThreshold: " + inMemThreshold); diff --git a/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/DummyCompressionCodec.java b/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/DummyCompressionCodec.java new file mode 100644 index 0000000000..530b9a3732 --- /dev/null +++ b/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/DummyCompressionCodec.java @@ -0,0 +1,149 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tez.runtime.library.common.shuffle.orderedgrouped; + +import org.apache.hadoop.conf.Configurable; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.io.compress.CompressionCodec; +import org.apache.hadoop.io.compress.CompressionInputStream; +import org.apache.hadoop.io.compress.CompressionOutputStream; +import org.apache.hadoop.io.compress.Compressor; +import org.apache.hadoop.io.compress.Decompressor; + +import com.google.common.annotations.VisibleForTesting; + +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; + +import static org.mockito.Mockito.mock; + +/** + * A dummy codec. It passes everything to underlying stream + */ +public class DummyCompressionCodec implements CompressionCodec, Configurable { + @VisibleForTesting + int createInputStreamCalled = 0; + private Configuration conf; + + @Override + public CompressionOutputStream createOutputStream(OutputStream out) throws IOException { + return new DummyCompressionOutputStream(out); + } + + @Override + public CompressionOutputStream createOutputStream(OutputStream out, Compressor compressor) throws IOException { + return new DummyCompressionOutputStream(out); + } + + @Override + public Class getCompressorType() { + return Compressor.class; + } + + @Override + public Compressor createCompressor() { + return mock(Compressor.class); + } + + @Override + public CompressionInputStream createInputStream(InputStream in) throws IOException { + return new DummyCompressionInputStream(in); + } + + @Override + public CompressionInputStream createInputStream(InputStream in, Decompressor decompressor) throws IOException { + createInputStreamCalled += 1; + return new DummyCompressionInputStream(in); + } + + @Override + public Class getDecompressorType() { + return Decompressor.class; + } + + @Override + public Decompressor createDecompressor() { + return mock(Decompressor.class); + } + + @Override + public String getDefaultExtension() { + return null; + } + + class DummyCompressionOutputStream extends CompressionOutputStream { + + protected DummyCompressionOutputStream(OutputStream out) { + super(out); + } + + @Override + public void write(int b) throws IOException { + out.write(b); + } + + @Override + public void write(byte[] b, int off, int len) throws IOException { + out.write(b, off, len); + } + + @Override + public void finish() throws IOException { + //no-op + } + + @Override + public void resetState() throws IOException { + //no-op + } + } + + class DummyCompressionInputStream extends CompressionInputStream { + + protected DummyCompressionInputStream(InputStream in) throws IOException { + super(in); + } + + @Override + public int read() throws IOException { + return in.read(); + } + + @Override + public int read(byte[] b, int off, int len) throws IOException { + return in.read(b, off, len); + } + + @Override + public void resetState() throws IOException { + //no-op + } + } + + @Override + public void setConf(Configuration conf) { + this.conf = conf; + } + + @Override + public Configuration getConf() { + return conf; + } +} diff --git a/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/TestFetcher.java b/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/TestFetcher.java index ef371c200c..068c8f7b02 100644 --- a/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/TestFetcher.java +++ b/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/TestFetcher.java @@ -19,15 +19,15 @@ package org.apache.tez.runtime.library.common.shuffle.orderedgrouped; import static org.junit.Assert.fail; -import static org.mockito.Matchers.any; -import static org.mockito.Matchers.anyBoolean; -import static org.mockito.Matchers.anyInt; -import static org.mockito.Matchers.anyLong; -import static org.mockito.Matchers.anyString; -import static org.mockito.Matchers.eq; +import static org.mockito.Mockito.any; +import static org.mockito.Mockito.anyBoolean; +import static org.mockito.Mockito.anyInt; +import static org.mockito.Mockito.anyLong; +import static org.mockito.Mockito.anyString; import static org.mockito.Mockito.doAnswer; import static org.mockito.Mockito.doNothing; import static org.mockito.Mockito.doReturn; +import static org.mockito.Mockito.eq; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.never; import static org.mockito.Mockito.spy; @@ -42,6 +42,7 @@ import java.net.URL; import java.util.Arrays; import java.util.Collection; +import java.util.HashMap; import java.util.Iterator; import java.util.LinkedHashMap; import java.util.LinkedList; @@ -59,7 +60,9 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.RawLocalFileSystem; import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.tez.common.counters.TezCounters; import org.apache.tez.common.security.JobTokenSecretManager; @@ -70,7 +73,10 @@ import org.apache.tez.runtime.library.common.security.SecureShuffleUtils; import org.apache.tez.runtime.library.common.sort.impl.TezIndexRecord; import org.apache.tez.runtime.library.exceptions.FetcherReadTimeoutException; +import org.apache.tez.runtime.library.testutils.RuntimeTestUtils; +import org.apache.tez.runtime.library.common.shuffle.InputAttemptFetchFailure; import org.apache.tez.runtime.library.common.shuffle.ShuffleUtils; +import org.apache.tez.runtime.library.common.shuffle.api.ShuffleHandlerError; import org.junit.Assert; import org.junit.Test; import org.mockito.ArgumentCaptor; @@ -84,7 +90,7 @@ public class TestFetcher { public static final String HOST = "localhost"; public static final int PORT = 65; public static final int DAG_ID = 1; - public static final String APP_ID = "application_1234_1"; + public static final ApplicationId APP_ID = ApplicationId.newInstance(0, 1); private TezCounters tezCounters = new TezCounters(); private TezCounter ioErrsCounter = tezCounters.findCounter(ShuffleScheduler.SHUFFLE_ERR_GRP_NAME, @@ -115,10 +121,6 @@ public void testInputsReturnedOnConnectionException() throws Exception { Shuffle shuffle = mock(Shuffle.class); - InputContext inputContext = mock(InputContext.class); - doReturn(new TezCounters()).when(inputContext).getCounters(); - doReturn("src vertex").when(inputContext).getSourceVertexName(); - MapHost mapHost = new MapHost(HOST, PORT, 0, 1); InputAttemptIdentifier inputAttemptIdentifier = new InputAttemptIdentifier(0, 0, "attempt"); mapHost.addKnownMap(inputAttemptIdentifier); @@ -126,11 +128,10 @@ public void testInputsReturnedOnConnectionException() throws Exception { doReturn(mapsForHost).when(scheduler).getMapsForHost(mapHost); FetcherOrderedGrouped fetcher = - new FetcherOrderedGrouped(null, scheduler, merger, shuffle, null, false, 0, - null, conf, false, HOST, PORT, "src vertex", mapHost, ioErrsCounter, - wrongLengthErrsCounter, badIdErrsCounter, - wrongMapErrsCounter, connectionErrsCounter, wrongReduceErrsCounter, APP_ID, DAG_ID, - false, false, true, false); + new FetcherOrderedGrouped(null, scheduler, merger, shuffle, null, false, 0, null, conf, + getRawFs(conf), false, HOST, PORT, mapHost, ioErrsCounter, + wrongLengthErrsCounter, badIdErrsCounter, wrongMapErrsCounter, connectionErrsCounter, + wrongReduceErrsCounter, false, false, true, false, createMockInputContext()); fetcher.call(); verify(scheduler).getMapsForHost(mapHost); @@ -146,19 +147,14 @@ public void testLocalFetchModeSetting1() throws Exception { MergeManager merger = mock(MergeManager.class); Shuffle shuffle = mock(Shuffle.class); - InputContext inputContext = mock(InputContext.class); - doReturn(new TezCounters()).when(inputContext).getCounters(); - doReturn("src vertex").when(inputContext).getSourceVertexName(); - final boolean ENABLE_LOCAL_FETCH = true; final boolean DISABLE_LOCAL_FETCH = false; MapHost mapHost = new MapHost(HOST, PORT, 0, 1); FetcherOrderedGrouped fetcher = - new FetcherOrderedGrouped(null, scheduler, merger, shuffle, null, false, 0, - null, conf, ENABLE_LOCAL_FETCH, HOST, PORT, "src vertex", mapHost, ioErrsCounter, - wrongLengthErrsCounter, badIdErrsCounter, - wrongMapErrsCounter, connectionErrsCounter, wrongReduceErrsCounter, APP_ID, DAG_ID, - false, false, true, false); + new FetcherOrderedGrouped(null, scheduler, merger, shuffle, null, false, 0, null, conf, + getRawFs(conf), ENABLE_LOCAL_FETCH, HOST, PORT, mapHost, ioErrsCounter, + wrongLengthErrsCounter, badIdErrsCounter, wrongMapErrsCounter, connectionErrsCounter, + wrongReduceErrsCounter, false, false, true, false, createMockInputContext()); // when local mode is enabled and host and port matches use local fetch FetcherOrderedGrouped spyFetcher = spy(fetcher); @@ -167,47 +163,44 @@ public void testLocalFetchModeSetting1() throws Exception { spyFetcher.fetchNext(); verify(spyFetcher, times(1)).setupLocalDiskFetch(mapHost); - verify(spyFetcher, never()).copyFromHost(any(MapHost.class)); + verify(spyFetcher, never()).copyFromHost(any()); // if hostname does not match use http mapHost = new MapHost(HOST + "_OTHER", PORT, 0, 1); fetcher = - new FetcherOrderedGrouped(null, scheduler, merger, shuffle, null, false, 0, - null, conf, ENABLE_LOCAL_FETCH, HOST, PORT, "src vertex", mapHost, ioErrsCounter, - wrongLengthErrsCounter, badIdErrsCounter, - wrongMapErrsCounter, connectionErrsCounter, wrongReduceErrsCounter, APP_ID, DAG_ID, - false, false, true, false); + new FetcherOrderedGrouped(null, scheduler, merger, shuffle, null, false, 0, null, conf, + getRawFs(conf), ENABLE_LOCAL_FETCH, HOST, PORT, mapHost, ioErrsCounter, + wrongLengthErrsCounter, badIdErrsCounter, wrongMapErrsCounter, connectionErrsCounter, + wrongReduceErrsCounter, false, false, true, false, createMockInputContext()); spyFetcher = spy(fetcher); doNothing().when(spyFetcher).setupLocalDiskFetch(mapHost); spyFetcher.fetchNext(); - verify(spyFetcher, never()).setupLocalDiskFetch(any(MapHost.class)); + verify(spyFetcher, never()).setupLocalDiskFetch(any()); verify(spyFetcher, times(1)).copyFromHost(mapHost); // if port does not match use http mapHost = new MapHost(HOST, PORT + 1, 0, 1); fetcher = - new FetcherOrderedGrouped(null, scheduler, merger, shuffle, null, false, 0, - null, conf, ENABLE_LOCAL_FETCH, HOST, PORT, "src vertex", mapHost, ioErrsCounter, - wrongLengthErrsCounter, badIdErrsCounter, - wrongMapErrsCounter, connectionErrsCounter, wrongReduceErrsCounter, APP_ID, DAG_ID, - false, false, true, false); + new FetcherOrderedGrouped(null, scheduler, merger, shuffle, null, false, 0, null, conf, + getRawFs(conf), ENABLE_LOCAL_FETCH, HOST, PORT, mapHost, ioErrsCounter, + wrongLengthErrsCounter, badIdErrsCounter, wrongMapErrsCounter, connectionErrsCounter, + wrongReduceErrsCounter, false, false, true, false, createMockInputContext()); spyFetcher = spy(fetcher); doNothing().when(spyFetcher).setupLocalDiskFetch(mapHost); spyFetcher.fetchNext(); - verify(spyFetcher, never()).setupLocalDiskFetch(any(MapHost.class)); + verify(spyFetcher, never()).setupLocalDiskFetch(any()); verify(spyFetcher, times(1)).copyFromHost(mapHost); //if local fetch is not enabled mapHost = new MapHost(HOST, PORT, 0, 1); - fetcher = new FetcherOrderedGrouped(null, scheduler, merger, shuffle, null, false, 0, - null, conf, DISABLE_LOCAL_FETCH, HOST, PORT, "src vertex", mapHost, ioErrsCounter, - wrongLengthErrsCounter, badIdErrsCounter, - wrongMapErrsCounter, connectionErrsCounter, wrongReduceErrsCounter, APP_ID, DAG_ID, - false, false, true, false); + fetcher = new FetcherOrderedGrouped(null, scheduler, merger, shuffle, null, false, 0, null, + conf, getRawFs(conf), DISABLE_LOCAL_FETCH, HOST, PORT, mapHost, ioErrsCounter, + wrongLengthErrsCounter, badIdErrsCounter, wrongMapErrsCounter, connectionErrsCounter, + wrongReduceErrsCounter, false, false, true, false, createMockInputContext()); spyFetcher = spy(fetcher); doNothing().when(spyFetcher).setupLocalDiskFetch(mapHost); @@ -223,15 +216,12 @@ public void testSetupLocalDiskFetch() throws Exception { ShuffleScheduler scheduler = mock(ShuffleScheduler.class); MergeManager merger = mock(MergeManager.class); Shuffle shuffle = mock(Shuffle.class); - InputContext inputContext = mock(InputContext.class); - when(inputContext.getCounters()).thenReturn(new TezCounters()); - when(inputContext.getSourceVertexName()).thenReturn(""); MapHost host = new MapHost(HOST, PORT, 1, 1); - FetcherOrderedGrouped fetcher = new FetcherOrderedGrouped(null, scheduler, merger, shuffle, null, false, 0, - null, conf, true, HOST, PORT, "src vertex", host, ioErrsCounter, wrongLengthErrsCounter, badIdErrsCounter, - wrongMapErrsCounter, connectionErrsCounter, wrongReduceErrsCounter, APP_ID, DAG_ID, - false, false, true, false); + FetcherOrderedGrouped fetcher = new FetcherOrderedGrouped(null, scheduler, merger, shuffle, + null, false, 0, null, conf, getRawFs(conf), true, HOST, PORT, host, + ioErrsCounter, wrongLengthErrsCounter, badIdErrsCounter, wrongMapErrsCounter, + connectionErrsCounter, wrongReduceErrsCounter, false, false, true, false, createMockInputContext()); FetcherOrderedGrouped spyFetcher = spy(fetcher); @@ -244,7 +234,7 @@ public void testSetupLocalDiskFetch() throws Exception { ); final int FIRST_FAILED_ATTEMPT_IDX = 2; final int SECOND_FAILED_ATTEMPT_IDX = 4; - final int[] sucessfulAttemptsIndexes = { 0, 1, 3 }; + final int[] successfulAttemptsIndexes = { 0, 1, 3 }; doReturn(srcAttempts).when(scheduler).getMapsForHost(host); @@ -276,8 +266,8 @@ public MapOutput answer(InvocationOnMock invocation) throws Throwable { return mapOutput; } }).when(spyFetcher) - .getMapOutputForDirectDiskFetch(any(InputAttemptIdentifier.class), any(Path.class), - any(TezIndexRecord.class)); + .getMapOutputForDirectDiskFetch(any(), any(), + any()); doAnswer(new Answer() { @Override @@ -285,7 +275,7 @@ public Path answer(InvocationOnMock invocation) throws Throwable { Object[] args = invocation.getArguments(); return new Path(SHUFFLE_INPUT_FILE_PREFIX + args[0]); } - }).when(spyFetcher).getShuffleInputFileName(anyString(), anyString()); + }).when(spyFetcher).getShuffleInputFileName(any(), any()); for (int i = 0; i < host.getPartitionCount(); i++) { doAnswer(new Answer() { @@ -299,9 +289,9 @@ public TezIndexRecord answer(InvocationOnMock invocation) throws Throwable { throw new IOException("failing to simulate failure case"); } // match with params for copySucceeded below. - return new TezIndexRecord(p * 10, p * 1000, p * 100); + return new TezIndexRecord(p * 10, (p+1) * 1000, (p+2) * 100); } - }).when(spyFetcher).getIndexRecord(anyString(), eq(host.getPartitionId() + i)); + }).when(spyFetcher).getIndexRecord(any(), eq(host.getPartitionId() + i)); } doNothing().when(scheduler).copySucceeded(any(InputAttemptIdentifier.class), any(MapHost.class), @@ -314,34 +304,107 @@ public TezIndexRecord answer(InvocationOnMock invocation) throws Throwable { spyFetcher.setupLocalDiskFetch(host); // should have exactly 3 success and 1 failure. - for (int i : sucessfulAttemptsIndexes) { + for (int i : successfulAttemptsIndexes) { for (int j = 0; j < host.getPartitionCount(); j++) { verifyCopySucceeded(scheduler, host, srcAttempts, i, j); } } - verify(scheduler).copyFailed(srcAttempts.get(FIRST_FAILED_ATTEMPT_IDX).expand(0), host, true, false, true); - verify(scheduler).copyFailed(srcAttempts.get(SECOND_FAILED_ATTEMPT_IDX).expand(0), host, true, false, true); + verify(scheduler).copyFailed( + eq(InputAttemptFetchFailure.fromLocalFetchFailure(srcAttempts.get(FIRST_FAILED_ATTEMPT_IDX).expand(0))), + eq(host), eq(true), eq(false)); + verify(scheduler).copyFailed( + eq(InputAttemptFetchFailure.fromLocalFetchFailure(srcAttempts.get(SECOND_FAILED_ATTEMPT_IDX).expand(0))), + eq(host), eq(true), eq(false)); verify(spyFetcher).putBackRemainingMapOutputs(host); verify(scheduler).putBackKnownMapOutput(host, srcAttempts.get(FIRST_FAILED_ATTEMPT_IDX)); verify(scheduler).putBackKnownMapOutput(host, srcAttempts.get(SECOND_FAILED_ATTEMPT_IDX)); } + @Test (timeout = 5000) + public void testSetupLocalDiskFetchEmptyPartitions() throws Exception { + Configuration conf = new TezConfiguration(); + ShuffleScheduler scheduler = mock(ShuffleScheduler.class); + MergeManager merger = mock(MergeManager.class); + Shuffle shuffle = mock(Shuffle.class); + + MapHost host = new MapHost(HOST, PORT, 1, 1); + FetcherOrderedGrouped fetcher = new FetcherOrderedGrouped(null, scheduler, merger, shuffle, + null, false, 0, null, conf, getRawFs(conf), true, HOST, PORT, host, + ioErrsCounter, wrongLengthErrsCounter, badIdErrsCounter, wrongMapErrsCounter, + connectionErrsCounter, wrongReduceErrsCounter, false, false, true, false, createMockInputContext()); + FetcherOrderedGrouped spyFetcher = spy(fetcher); + + final List srcAttempts = Arrays.asList( + new CompositeInputAttemptIdentifier(0, 1, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_0", 1), + new CompositeInputAttemptIdentifier(1, 2, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_1", 1), + new CompositeInputAttemptIdentifier(2, 3, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_2", 1), + new CompositeInputAttemptIdentifier(3, 4, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_3", 1), + new CompositeInputAttemptIdentifier(4, 4, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_4", 1) + ); + + doReturn(srcAttempts).when(scheduler).getMapsForHost(host); + + final ConcurrentMap pathToIdentifierMap = new ConcurrentHashMap(); + for (CompositeInputAttemptIdentifier srcAttempt : srcAttempts) { + for (int i = 0; i < srcAttempt.getInputIdentifierCount(); i++) { + ShuffleScheduler.PathPartition pathPartition = new ShuffleScheduler.PathPartition(srcAttempt.getPathComponent(), host.getPartitionId() + i); + pathToIdentifierMap.put(pathPartition, srcAttempt.expand(i)); + } + } + doAnswer(new Answer() { + @Override + public InputAttemptIdentifier answer(InvocationOnMock invocation) throws Throwable { + Object[] args = invocation.getArguments(); + String path = (String) args[0]; + int reduceId = (int) args[1]; + return pathToIdentifierMap.get(new ShuffleScheduler.PathPartition(path, reduceId)); + } + }).when(scheduler) + .getIdentifierForFetchedOutput(any(String.class), any(int.class)); + + doAnswer(new Answer() { + @Override + public Path answer(InvocationOnMock invocation) throws Throwable { + Object[] args = invocation.getArguments(); + return new Path(SHUFFLE_INPUT_FILE_PREFIX + args[0]); + } + }).when(spyFetcher).getShuffleInputFileName(any(), anyString()); + + for (int i = 0; i < host.getPartitionCount(); i++) { + doAnswer(new Answer() { + @Override + public TezIndexRecord answer(InvocationOnMock invocation) throws Throwable { + Object[] args = invocation.getArguments(); + String pathComponent = (String) args[0]; + int len = pathComponent.length(); + long p = Long.valueOf(pathComponent.substring(len - 1, len)); + // match with params for copySucceeded below. + return new TezIndexRecord(p * 10, 0, 0); + } + }).when(spyFetcher).getIndexRecord(anyString(), eq(host.getPartitionId() + i)); + } + + doNothing().when(scheduler).copySucceeded(any(InputAttemptIdentifier.class), any(MapHost.class), + anyLong(), anyLong(), anyLong(), any(MapOutput.class), anyBoolean()); + spyFetcher.setupLocalDiskFetch(host); + verify(scheduler, times(0)).copySucceeded(any(InputAttemptIdentifier.class), any(MapHost.class), + anyLong(), anyLong(), anyLong(), any(MapOutput.class), anyBoolean()); + verify(spyFetcher).putBackRemainingMapOutputs(host); + } + @Test(timeout = 5000) public void testSetupLocalDiskFetchAutoReduce() throws Exception { Configuration conf = new TezConfiguration(); ShuffleScheduler scheduler = mock(ShuffleScheduler.class); MergeManager merger = mock(MergeManager.class); Shuffle shuffle = mock(Shuffle.class); - InputContext inputContext = mock(InputContext.class); - when(inputContext.getCounters()).thenReturn(new TezCounters()); - when(inputContext.getSourceVertexName()).thenReturn(""); MapHost host = new MapHost(HOST, PORT, 1, 2); - FetcherOrderedGrouped fetcher = new FetcherOrderedGrouped(null, scheduler, merger, shuffle, null, false, 0, - null, conf, true, HOST, PORT, "src vertex", host, ioErrsCounter, wrongLengthErrsCounter, badIdErrsCounter, - wrongMapErrsCounter, connectionErrsCounter, wrongReduceErrsCounter, APP_ID, DAG_ID, - false, false, true, false); + FetcherOrderedGrouped fetcher = new FetcherOrderedGrouped(null, scheduler, merger, shuffle, + null, false, 0, null, conf, getRawFs(conf), true, HOST, PORT, host, + ioErrsCounter, wrongLengthErrsCounter, badIdErrsCounter, wrongMapErrsCounter, + connectionErrsCounter, wrongReduceErrsCounter, false, false, true, false, createMockInputContext()); FetcherOrderedGrouped spyFetcher = spy(fetcher); @@ -354,7 +417,7 @@ public void testSetupLocalDiskFetchAutoReduce() throws Exception { ); final int FIRST_FAILED_ATTEMPT_IDX = 2; final int SECOND_FAILED_ATTEMPT_IDX = 4; - final int[] sucessfulAttemptsIndexes = { 0, 1, 3 }; + final int[] successfulAttemptsIndexes = { 0, 1, 3 }; doReturn(srcAttempts).when(scheduler).getMapsForHost(host); final ConcurrentMap pathToIdentifierMap @@ -396,7 +459,7 @@ public Path answer(InvocationOnMock invocation) throws Throwable { Object[] args = invocation.getArguments(); return new Path(SHUFFLE_INPUT_FILE_PREFIX + args[0]); } - }).when(spyFetcher).getShuffleInputFileName(anyString(), anyString()); + }).when(spyFetcher).getShuffleInputFileName(any(), any()); for (int i = 0; i < host.getPartitionCount(); i++) { doAnswer(new Answer() { @@ -412,9 +475,9 @@ public TezIndexRecord answer(InvocationOnMock invocation) throws Throwable { throw new IOException("Thowing exception to simulate failure case"); } // match with params for copySucceeded below. - return new TezIndexRecord(p * 10, p * 1000, p * 100); + return new TezIndexRecord(p * 10, (p + 1) * 1000, (p + 2) * 100); } - }).when(spyFetcher).getIndexRecord(anyString(), eq(host.getPartitionId() + i)); + }).when(spyFetcher).getIndexRecord(any(), eq(host.getPartitionId() + i)); } doNothing().when(scheduler).copySucceeded(any(InputAttemptIdentifier.class), any(MapHost.class), @@ -431,15 +494,23 @@ public TezIndexRecord answer(InvocationOnMock invocation) throws Throwable { spyFetcher.setupLocalDiskFetch(host); // should have exactly 3 success and 1 failure. - for (int i : sucessfulAttemptsIndexes) { + for (int i : successfulAttemptsIndexes) { for (int j = 0; j < host.getPartitionCount(); j++) { verifyCopySucceeded(scheduler, host, srcAttempts, i, j); } } - verify(scheduler).copyFailed(srcAttempts.get(FIRST_FAILED_ATTEMPT_IDX).expand(0), host, true, false, true); - verify(scheduler).copyFailed(srcAttempts.get(FIRST_FAILED_ATTEMPT_IDX).expand(1), host, true, false, true); - verify(scheduler).copyFailed(srcAttempts.get(SECOND_FAILED_ATTEMPT_IDX).expand(0), host, true, false, true); - verify(scheduler).copyFailed(srcAttempts.get(SECOND_FAILED_ATTEMPT_IDX).expand(1), host, true, false, true); + verify(scheduler).copyFailed( + eq(InputAttemptFetchFailure.fromLocalFetchFailure(srcAttempts.get(FIRST_FAILED_ATTEMPT_IDX).expand(0))), + eq(host), eq(true), eq(false)); + verify(scheduler).copyFailed( + eq(InputAttemptFetchFailure.fromLocalFetchFailure(srcAttempts.get(FIRST_FAILED_ATTEMPT_IDX).expand(1))), + eq(host), eq(true), eq(false)); + verify(scheduler).copyFailed(eq( + InputAttemptFetchFailure.fromLocalFetchFailure(srcAttempts.get(SECOND_FAILED_ATTEMPT_IDX).expand(0))), + eq(host), eq(true), eq(false)); + verify(scheduler).copyFailed(eq( + InputAttemptFetchFailure.fromLocalFetchFailure(srcAttempts.get(SECOND_FAILED_ATTEMPT_IDX).expand(1))), + eq(host), eq(true), eq(false)); verify(spyFetcher).putBackRemainingMapOutputs(host); verify(scheduler).putBackKnownMapOutput(host, srcAttempts.get(FIRST_FAILED_ATTEMPT_IDX)); @@ -455,8 +526,8 @@ private void verifyCopySucceeded(ShuffleScheduler scheduler, MapHost host, InputAttemptIdentifier srcAttemptToMatch = srcAttempts.get((int) p).expand(j); String filenameToMatch = SHUFFLE_INPUT_FILE_PREFIX + srcAttemptToMatch.getPathComponent(); ArgumentCaptor captureMapOutput = ArgumentCaptor.forClass(MapOutput.class); - verify(scheduler).copySucceeded(eq(srcAttemptToMatch), eq(host), eq(p * 100), - eq(p * 1000), anyLong(), captureMapOutput.capture(), anyBoolean()); + verify(scheduler).copySucceeded(eq(srcAttemptToMatch), eq(host), eq((p+2) * 100), + eq((p+1) * 1000), anyLong(), captureMapOutput.capture(), anyBoolean()); // cannot use the equals of MapOutput as it compares id which is private. so doing it manually MapOutput m = captureMapOutput.getAllValues().get(0); @@ -505,17 +576,12 @@ public void testWithRetry() throws Exception { MergeManager merger = mock(MergeManager.class); Shuffle shuffle = mock(Shuffle.class); - InputContext inputContext = mock(InputContext.class); - when(inputContext.getCounters()).thenReturn(new TezCounters()); - when(inputContext.getSourceVertexName()).thenReturn(""); - when(inputContext.getApplicationId()).thenReturn(ApplicationId.newInstance(0, 1)); - HttpConnectionParams httpConnectionParams = ShuffleUtils.getHttpConnectionParams(conf); final MapHost host = new MapHost(HOST, PORT, 1, 1); - FetcherOrderedGrouped mockFetcher = new FetcherOrderedGrouped(null, scheduler, merger, shuffle, null, false, 0, - null, conf, false, HOST, PORT, "src vertex", host, ioErrsCounter, wrongLengthErrsCounter, badIdErrsCounter, - wrongMapErrsCounter, connectionErrsCounter, wrongReduceErrsCounter, APP_ID, DAG_ID, - false, false, true, false); + FetcherOrderedGrouped mockFetcher = new FetcherOrderedGrouped(null, scheduler, merger, shuffle, + null, false, 0, null, conf, getRawFs(conf), false, HOST, PORT, host, + ioErrsCounter, wrongLengthErrsCounter, badIdErrsCounter, wrongMapErrsCounter, + connectionErrsCounter, wrongReduceErrsCounter, false, false, true, false, createMockInputContext()); final FetcherOrderedGrouped fetcher = spy(mockFetcher); @@ -525,7 +591,7 @@ public void testWithRetry() throws Exception { new InputAttemptIdentifier(3, 4, InputAttemptIdentifier.PATH_PREFIX + "pathComponent_3") ); doReturn(srcAttempts).when(scheduler).getMapsForHost(host); - doReturn(true).when(fetcher).setupConnection(any(MapHost.class), any(Collection.class)); + doReturn(true).when(fetcher).setupConnection(any(), any()); URL url = ShuffleUtils.constructInputURL("http://" + HOST + ":" + PORT + "/mapOutput?job=job_123&&reduce=1&map=", srcAttempts, false); fetcher.httpConnection = new FakeHttpConnection(url, null, "", null); @@ -539,18 +605,16 @@ public MapOutput answer(InvocationOnMock invocation) throws Throwable { doReturn(args[0]).when(mapOutput).getAttemptIdentifier(); return mapOutput; } - }).when(merger).reserve(any(InputAttemptIdentifier.class), anyInt(), anyInt(), anyInt()); + }).when(merger).reserve(any(), anyInt(), anyInt(), anyInt()); //Create read timeout when reading data doAnswer(new Answer() { @Override public Void answer(InvocationOnMock invocation) throws Throwable { - // Emulate host down for 4 seconds. - Thread.sleep(4000); doReturn(false).when(fetcher).setupConnection(any(MapHost.class), any(Collection.class)); - // Throw IOException when fetcher tries to connect again to the same node + // Simulate read timeout by throwing proper exception throw new FetcherReadTimeoutException("creating fetcher socket read timeout exception"); } - }).when(fetcher).copyMapOutput(any(MapHost.class), any(DataInputStream.class), any(InputAttemptIdentifier.class)); + }).when(fetcher).copyMapOutput(any(), any(), any()); try { fetcher.copyFromHost(host); @@ -560,8 +624,8 @@ public MapOutput answer(InvocationOnMock invocation) throws Throwable { //setup connection should be called twice (1 for connect and another for retry) verify(fetcher, times(2)).setupConnection(any(MapHost.class), any(Collection.class)); //since copyMapOutput consistently fails, it should call copyFailed once - verify(scheduler, times(1)).copyFailed(any(InputAttemptIdentifier.class), any(MapHost.class), - anyBoolean(), anyBoolean(), anyBoolean()); + verify(scheduler, times(1)).copyFailed(any(InputAttemptFetchFailure.class), any(MapHost.class), + anyBoolean(), anyBoolean()); verify(fetcher, times(1)).putBackRemainingMapOutputs(any(MapHost.class)); verify(scheduler, times(3)).putBackKnownMapOutput(any(MapHost.class), @@ -590,23 +654,16 @@ public void testAsyncWithException() throws Exception { MergeManager merger = mock(MergeManager.class); Shuffle shuffle = mock(Shuffle.class); - TezCounters counters = new TezCounters(); - InputContext inputContext = mock(InputContext.class); - when(inputContext.getCounters()).thenReturn(counters); - when(inputContext.getSourceVertexName()).thenReturn(""); - JobTokenSecretManager jobMgr = mock(JobTokenSecretManager.class); doReturn(new byte[10]).when(jobMgr).computeHash(any(byte[].class)); HttpConnectionParams httpConnectionParams = ShuffleUtils.getHttpConnectionParams(conf); final MapHost host = new MapHost(HOST, PORT, 1, 1); FetcherOrderedGrouped mockFetcher = - new FetcherOrderedGrouped(httpConnectionParams, scheduler, merger, shuffle, jobMgr, - false, 0, - null, conf, false, HOST, PORT, "src vertex", host, ioErrsCounter, - wrongLengthErrsCounter, badIdErrsCounter, - wrongMapErrsCounter, connectionErrsCounter, wrongReduceErrsCounter, APP_ID, DAG_ID, - true, false, true, false); + new FetcherOrderedGrouped(httpConnectionParams, scheduler, merger, shuffle, jobMgr, false, + 0, null, conf, getRawFs(conf), false, HOST, PORT, host, ioErrsCounter, + wrongLengthErrsCounter, badIdErrsCounter, wrongMapErrsCounter, connectionErrsCounter, + wrongReduceErrsCounter, true, false, true, false, createMockInputContext()); final FetcherOrderedGrouped fetcher = spy(mockFetcher); fetcher.remaining = new LinkedHashMap(); final List srcAttempts = Arrays.asList( @@ -668,12 +725,10 @@ public void testInputAttemptIdentifierMap() { MergeManager merger = mock(MergeManager.class); Shuffle shuffle = mock(Shuffle.class); MapHost mapHost = new MapHost(HOST, PORT, 0, 1); - FetcherOrderedGrouped fetcher = - new FetcherOrderedGrouped(null, scheduler, merger, shuffle, null, false, 0, - null, conf, false, HOST, PORT, "src vertex", mapHost, ioErrsCounter, - wrongLengthErrsCounter, badIdErrsCounter, - wrongMapErrsCounter, connectionErrsCounter, wrongReduceErrsCounter, APP_ID, DAG_ID, - false, false, true, false); + FetcherOrderedGrouped fetcher = new FetcherOrderedGrouped(null, scheduler, merger, shuffle, + null, false, 0, null, conf, getRawFs(conf), false, HOST, PORT, mapHost, + ioErrsCounter, wrongLengthErrsCounter, badIdErrsCounter, wrongMapErrsCounter, + connectionErrsCounter, wrongReduceErrsCounter, false, false, true, false, createMockInputContext()); fetcher.populateRemainingMap(new LinkedList(Arrays.asList(srcAttempts))); Assert.assertEquals(expectedSrcAttempts.length, fetcher.remaining.size()); Iterator> iterator = fetcher.remaining.entrySet().iterator(); @@ -683,4 +738,52 @@ public void testInputAttemptIdentifierMap() { Assert.assertTrue(expectedSrcAttempts[count++].toString().compareTo(key) == 0); } } + + @Test + public void testShuffleHandlerDiskErrorOrdered() + throws Exception { + MapHost mapHost = new MapHost(HOST, PORT, 0, 1); + InputAttemptIdentifier inputAttemptIdentifier = new InputAttemptIdentifier(0, 0, "attempt"); + + FetcherOrderedGrouped fetcher = new FetcherOrderedGrouped(null, null, null, null, null, false, + 0, null, new TezConfiguration(), null, false, HOST, PORT, mapHost, + ioErrsCounter, wrongLengthErrsCounter, badIdErrsCounter, wrongMapErrsCounter, + connectionErrsCounter, wrongReduceErrsCounter, false, false, true, false, createMockInputContext()); + fetcher.remaining = new HashMap(); + + ShuffleHeader header = + new ShuffleHeader(ShuffleHandlerError.DISK_ERROR_EXCEPTION.toString(), -1, -1, -1); + DataInputStream input = RuntimeTestUtils.shuffleHeaderToDataInput(header); + + // copyMapOutput is used for remote fetch, this time it returns a fetch failure, which is fatal + // and should be treated as a local fetch failure + InputAttemptFetchFailure[] failures = + fetcher.copyMapOutput(mapHost, input, inputAttemptIdentifier); + + Assert.assertEquals(1, failures.length); + Assert.assertTrue(failures[0].isDiskErrorAtSource()); + Assert.assertFalse(failures[0].isLocalFetch()); + } + + private RawLocalFileSystem getRawFs(Configuration conf) { + try { + return (RawLocalFileSystem) FileSystem.getLocal(conf).getRaw(); + } catch (IOException e) { + // TODO Auto-generated catch block + throw new RuntimeException(e); + } + } + + + private InputContext createMockInputContext() { + InputContext inputContext = mock(InputContext.class); + + doReturn(APP_ID).when(inputContext).getApplicationId(); + doReturn(DAG_ID).when(inputContext).getDagIdentifier(); + doReturn(new TezCounters()).when(inputContext).getCounters(); + doReturn("src vertex").when(inputContext).getSourceVertexName(); + doReturn("task_Vertex").when(inputContext).getTaskVertexName(); + + return inputContext; + } } diff --git a/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/TestMergeManager.java b/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/TestMergeManager.java index 5737578c70..dde067beb8 100644 --- a/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/TestMergeManager.java +++ b/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/TestMergeManager.java @@ -21,9 +21,7 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotEquals; import static org.junit.Assert.assertTrue; -import static org.mockito.Matchers.any; import static org.mockito.Mockito.atLeastOnce; -import static org.mockito.Mockito.doAnswer; import static org.mockito.Mockito.doReturn; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.spy; @@ -38,8 +36,7 @@ import com.google.common.collect.Sets; -import org.mockito.invocation.InvocationOnMock; -import org.mockito.stubbing.Answer; +import org.apache.hadoop.io.serializer.WritableSerialization; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; @@ -257,6 +254,66 @@ public void testIntermediateMemoryMergeAccounting() throws Exception { assertEquals(data1.length + data2.length, mergeManager.getUsedMemory()); } + @Test + public void testDiskMergeWithCodec() throws Throwable { + Configuration conf = new TezConfiguration(defaultConf); + conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_KEY_CLASS, IntWritable.class.getName()); + conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_VALUE_CLASS, IntWritable.class.getName()); + conf.setInt(TezRuntimeConfiguration.TEZ_RUNTIME_IO_SORT_FACTOR, 3); + + Path localDir = new Path(workDir, "local"); + localFs.mkdirs(localDir); + + conf.setStrings(TezRuntimeFrameworkConfigs.LOCAL_DIRS, localDir.toString()); + + LocalDirAllocator localDirAllocator = + new LocalDirAllocator(TezRuntimeFrameworkConfigs.LOCAL_DIRS); + InputContext inputContext = createMockInputContext(UUID.randomUUID().toString()); + + // Create a mock compressor. We will check if it is used. + DummyCompressionCodec dummyCodec = new DummyCompressionCodec(); + dummyCodec.setConf(conf); + + MergeManager mergeManager = + new MergeManager(conf, localFs, localDirAllocator, inputContext, null, null, null, null, + mock(ExceptionReporter.class), 2000, dummyCodec, false, -1); + mergeManager.configureAndStart(); + + assertEquals(0, mergeManager.getUsedMemory()); + assertEquals(0, mergeManager.getCommitMemory()); + + InputAttemptIdentifier inputAttemptIdentifier1 = new InputAttemptIdentifier(0, 0); + InputAttemptIdentifier inputAttemptIdentifier2 = new InputAttemptIdentifier(1, 0); + InputAttemptIdentifier inputAttemptIdentifier3 = new InputAttemptIdentifier(2, 0); + InputAttemptIdentifier inputAttemptIdentifier4 = new InputAttemptIdentifier(3, 0); + byte[] data1 = generateDataBySizeAndGetBytes(conf, 500, inputAttemptIdentifier1); + byte[] data2 = generateDataBySizeAndGetBytes(conf, 500, inputAttemptIdentifier2); + byte[] data3 = generateDataBySizeAndGetBytes(conf, 500, inputAttemptIdentifier3); + byte[] data4 = generateDataBySizeAndGetBytes(conf, 500, inputAttemptIdentifier3); + + MapOutput mo1 = mergeManager.reserve(inputAttemptIdentifier1, data1.length, data1.length, 0); + MapOutput mo2 = mergeManager.reserve(inputAttemptIdentifier2, data2.length, data2.length, 0); + MapOutput mo3 = mergeManager.reserve(inputAttemptIdentifier3, data3.length, data3.length, 0); + MapOutput mo4 = mergeManager.reserve(inputAttemptIdentifier4, data4.length, data4.length, 0); + + mo1.getDisk().write(data1); + mo1.getDisk().flush(); + mo2.getDisk().write(data2); + mo2.getDisk().flush(); + mo3.getDisk().write(data3); + mo3.getDisk().flush(); + mo4.getDisk().write(data4); + mo4.getDisk().flush(); + + mo1.commit(); + mo2.commit(); + mo3.commit(); + mo4.commit(); + + mergeManager.close(true); + Assert.assertTrue(dummyCodec.createInputStreamCalled > 0); + } + @Test(timeout = 60000l) public void testIntermediateMemoryMerge() throws Throwable { Configuration conf = new TezConfiguration(defaultConf); @@ -573,8 +630,8 @@ public void testIntermediateMemoryMerge() throws Throwable { private byte[] generateDataBySize(Configuration conf, int rawLen, InputAttemptIdentifier inputAttemptIdentifier) throws IOException { ByteArrayOutputStream baos = new ByteArrayOutputStream(); FSDataOutputStream fsdos = new FSDataOutputStream(baos, null); - IFile.Writer writer = - new IFile.Writer(conf, fsdos, IntWritable.class, IntWritable.class, null, null, null); + IFile.Writer writer = new IFile.Writer(new WritableSerialization(), new WritableSerialization(), + fsdos, IntWritable.class, IntWritable.class, null, null, null); int i = 0; while(true) { writer.append(new IntWritable(i), new IntWritable(i)); @@ -592,11 +649,35 @@ private byte[] generateDataBySize(Configuration conf, int rawLen, InputAttemptId return data; } - private byte[] generateData(Configuration conf, int numEntries, InputAttemptIdentifier inputAttemptIdentifier) throws IOException { + private byte[] generateDataBySizeAndGetBytes(Configuration conf, int rawLen, + InputAttemptIdentifier inputAttemptIdentifier) throws IOException { + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + FSDataOutputStream fsdos = new FSDataOutputStream(baos, null); + IFile.Writer writer = new IFile.Writer(new WritableSerialization(), new WritableSerialization(), + fsdos, IntWritable.class, IntWritable.class, null, null, null); + int i = 0; + while(true) { + writer.append(new IntWritable(i), new IntWritable(i)); + i++; + if (writer.getRawLength() > rawLen) { + break; + } + } + writer.close(); + int compressedLength = (int)writer.getCompressedLength(); + int rawLength = (int)writer.getRawLength(); + byte[] data = new byte[rawLength]; + ShuffleUtils.shuffleToMemory(data, new ByteArrayInputStream(baos.toByteArray()), + rawLength, compressedLength, null, false, 0, LOG, inputAttemptIdentifier); + return baos.toByteArray(); + } + + private byte[] generateData(Configuration conf, int numEntries, + InputAttemptIdentifier inputAttemptIdentifier) throws IOException { ByteArrayOutputStream baos = new ByteArrayOutputStream(); FSDataOutputStream fsdos = new FSDataOutputStream(baos, null); - IFile.Writer writer = - new IFile.Writer(conf, fsdos, IntWritable.class, IntWritable.class, null, null, null); + IFile.Writer writer = new IFile.Writer(new WritableSerialization(), new WritableSerialization(), + fsdos, IntWritable.class, IntWritable.class, null, null, null); for (int i = 0; i < numEntries; ++i) { writer.append(new IntWritable(i), new IntWritable(i)); } @@ -757,9 +838,9 @@ public void testOnDiskMergerFilenames() throws IOException, InterruptedException assertEquals(m2Path.toString().length(), m3Path.toString().length()); // Ensure the filenames are used correctly - based on the first file given to the merger. - String m1Prefix = m1Path.toString().substring(0, m1Path.toString().indexOf(".")); - String m2Prefix = m2Path.toString().substring(0, m2Path.toString().indexOf(".")); - String m3Prefix = m3Path.toString().substring(0, m3Path.toString().indexOf(".")); + String m1Prefix = m1Path.toString().substring(0, m1Path.toString().lastIndexOf('.')); + String m2Prefix = m2Path.toString().substring(0, m2Path.toString().lastIndexOf('.')); + String m3Prefix = m3Path.toString().substring(0, m3Path.toString().lastIndexOf('.')); assertEquals(m1Prefix, m2Prefix); assertNotEquals(m1Prefix, m3Prefix); @@ -934,7 +1015,8 @@ private SrcFileInfo createFile(Configuration conf, FileSystem fs, Path path, int for (int i = 0; i < numPartitions; i++) { long pos = outStream.getPos(); IFile.Writer writer = - new IFile.Writer(conf, outStream, IntWritable.class, IntWritable.class, null, null, null); + new IFile.Writer(new WritableSerialization(), new WritableSerialization(), outStream, + IntWritable.class, IntWritable.class, null, null, null); for (int j = 0; j < numKeysPerPartition; j++) { writer.append(new IntWritable(currentKey), new IntWritable(currentKey)); currentKey++; diff --git a/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/TestShuffle.java b/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/TestShuffle.java index a28b1fa1a5..590affc078 100644 --- a/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/TestShuffle.java +++ b/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/TestShuffle.java @@ -16,10 +16,10 @@ import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; -import static org.mockito.Matchers.anyInt; -import static org.mockito.Matchers.anyString; -import static org.mockito.Matchers.eq; +import static org.mockito.Mockito.anyInt; +import static org.mockito.Mockito.anyString; import static org.mockito.Mockito.doReturn; +import static org.mockito.Mockito.eq; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.times; import static org.mockito.Mockito.verify; @@ -144,6 +144,7 @@ private InputContext createTezInputContext() throws IOException { InputContext inputContext = mock(InputContext.class); doReturn(applicationId).when(inputContext).getApplicationId(); doReturn("sourceVertex").when(inputContext).getSourceVertexName(); + doReturn("taskVertex").when(inputContext).getTaskVertexName(); when(inputContext.getCounters()).thenReturn(new TezCounters()); ExecutionContext executionContext = new ExecutionContextImpl("localhost"); doReturn(executionContext).when(inputContext).getExecutionContext(); @@ -159,8 +160,8 @@ private InputContext createTezInputContext() throws IOException { @Override public ExecutorService answer(InvocationOnMock invocation) throws Throwable { return sharedExecutor.createExecutorService( - invocation.getArgumentAt(0, Integer.class), - invocation.getArgumentAt(1, String.class)); + invocation.getArgument(0, Integer.class), + invocation.getArgument(1, String.class)); } }); return inputContext; diff --git a/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/TestShuffleInputEventHandlerOrderedGrouped.java b/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/TestShuffleInputEventHandlerOrderedGrouped.java index cf4ff6a826..8da4adcf48 100644 --- a/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/TestShuffleInputEventHandlerOrderedGrouped.java +++ b/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/TestShuffleInputEventHandlerOrderedGrouped.java @@ -37,11 +37,11 @@ import java.util.concurrent.ExecutorService; import static org.junit.Assert.assertTrue; -import static org.mockito.Matchers.any; -import static org.mockito.Matchers.anyInt; -import static org.mockito.Matchers.anyString; -import static org.mockito.Matchers.eq; +import static org.mockito.Mockito.any; +import static org.mockito.Mockito.anyInt; +import static org.mockito.Mockito.anyString; import static org.mockito.Mockito.doReturn; +import static org.mockito.Mockito.eq; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.spy; import static org.mockito.Mockito.times; @@ -95,8 +95,8 @@ private InputContext createTezInputContext() throws IOException { @Override public ExecutorService answer(InvocationOnMock invocation) throws Throwable { return sharedExecutor.createExecutorService( - invocation.getArgumentAt(0, Integer.class), - invocation.getArgumentAt(1, String.class)); + invocation.getArgument(0, Integer.class), + invocation.getArgument(1, String.class)); } }); return inputContext; @@ -272,7 +272,7 @@ public void testPiplinedShuffleEvents_WithOutofOrderAttempts() throws IOExceptio handler.handleEvents(Collections.singletonList(dme2)); // task should issue kill request - verify(scheduler, times(1)).killSelf(any(IOException.class), any(String.class)); + verify(scheduler, times(1)).killSelf(any(), any()); } @Test (timeout = 5000) @@ -307,7 +307,7 @@ public void testPipelinedShuffle_WithObsoleteEvents() throws IOException, Interr handler.handleEvents(events); // task should issue kill request, as inputs are scheduled for download already. - verify(scheduler, times(1)).killSelf(any(IOException.class), any(String.class)); + verify(scheduler, times(1)).killSelf(any(), any()); } @Test(timeout = 5000) @@ -348,8 +348,8 @@ public void testAllPartitionsEmpty() throws IOException { events.add(dme); handler.handleEvents(events); InputAttemptIdentifier expectedIdentifier = new InputAttemptIdentifier(targetIdx, 0); - verify(scheduler).copySucceeded(eq(expectedIdentifier), any(MapHost.class), eq(0l), - eq(0l), eq(0l), any(MapOutput.class), eq(true)); + verify(scheduler).copySucceeded(eq(expectedIdentifier), any(), eq(0L), + eq(0L), eq(0L), any(), eq(true)); } @Test(timeout = 5000) @@ -362,8 +362,8 @@ public void testCurrentPartitionEmpty() throws IOException { events.add(dme); handler.handleEvents(events); InputAttemptIdentifier expectedIdentifier = new InputAttemptIdentifier(targetIdx, 0); - verify(scheduler).copySucceeded(eq(expectedIdentifier), any(MapHost.class), eq(0l), - eq(0l), eq(0l), any(MapOutput.class), eq(true)); + verify(scheduler).copySucceeded(eq(expectedIdentifier), any(), eq(0L), + eq(0L), eq(0L), any(), eq(true)); } @Test(timeout = 5000) diff --git a/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/TestShuffleScheduler.java b/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/TestShuffleScheduler.java index c61391ccfb..9df9aaf69b 100644 --- a/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/TestShuffleScheduler.java +++ b/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/shuffle/orderedgrouped/TestShuffleScheduler.java @@ -16,9 +16,9 @@ import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; -import static org.mockito.Matchers.any; -import static org.mockito.Matchers.anyInt; -import static org.mockito.Matchers.anyString; +import static org.mockito.Mockito.any; +import static org.mockito.Mockito.anyInt; +import static org.mockito.Mockito.anyString; import static org.mockito.Mockito.atLeast; import static org.mockito.Mockito.doAnswer; import static org.mockito.Mockito.doReturn; @@ -31,10 +31,11 @@ import java.io.IOException; import java.nio.ByteBuffer; import java.util.concurrent.Callable; +import java.util.concurrent.CountDownLatch; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.Future; -import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.TimeUnit; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.Text; @@ -54,7 +55,9 @@ import org.apache.tez.runtime.library.api.TezRuntimeConfiguration; import org.apache.tez.runtime.library.common.CompositeInputAttemptIdentifier; import org.apache.tez.runtime.library.common.InputAttemptIdentifier; +import org.apache.tez.runtime.library.common.shuffle.InputAttemptFetchFailure; import org.junit.After; +import org.junit.Assert; import org.junit.Before; import org.junit.Test; import org.mockito.invocation.InvocationOnMock; @@ -88,7 +91,6 @@ public void testNumParallelScheduledFetchers() throws IOException, InterruptedEx new ShuffleSchedulerForTest(inputContext, conf, numInputs, shuffle, mergeManager, mergeManager, System.currentTimeMillis(), null, false, 0, "srcName", true); - Future executorFuture = null; ExecutorService executor = Executors.newFixedThreadPool(1); try { @@ -109,10 +111,9 @@ public Void call() throws Exception { scheduler.addKnownMapOutput("host" + i, 10000, 1, inputAttemptIdentifier); identifiers[i] = inputAttemptIdentifier; } - - // Sleep for a bit to allow the copies to be scheduled. - Thread.sleep(2000l); - assertEquals(10, scheduler.numFetchersCreated.get()); + // wait for all the copies to be scheduled with timeout + scheduler.latch.await(2000, TimeUnit.MILLISECONDS); + assertEquals(0, scheduler.latch.getCount()); } finally { scheduler.close(); @@ -248,8 +249,8 @@ public void _testReducerHealth_1(Configuration conf) throws IOException { for (int i = 100; i < 199; i++) { InputAttemptIdentifier inputAttemptIdentifier = new InputAttemptIdentifier(i, 0, "attempt_"); - scheduler.copyFailed(inputAttemptIdentifier, new MapHost("host" + (i % totalProducerNodes), - 10000, i, 1), false, true, false); + scheduler.copyFailed(InputAttemptFetchFailure.fromAttempt(inputAttemptIdentifier), + new MapHost("host" + (i % totalProducerNodes), 10000, i, 1), false, true); } @@ -257,9 +258,8 @@ public void _testReducerHealth_1(Configuration conf) throws IOException { new InputAttemptIdentifier(200, 0, "attempt_"); //Should fail here and report exception as reducer is not healthy - scheduler.copyFailed(inputAttemptIdentifier, new MapHost("host" + (200 % - totalProducerNodes), - 10000, 200, 1), false, true, false); + scheduler.copyFailed(InputAttemptFetchFailure.fromAttempt(inputAttemptIdentifier), + new MapHost("host" + (200 % totalProducerNodes), 10000, 200, 1), false, true); int minFailurePerHost = conf.getInt( TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_MIN_FAILURES_PER_HOST, @@ -267,10 +267,10 @@ public void _testReducerHealth_1(Configuration conf) throws IOException { if (minFailurePerHost <= 4) { //As per test threshold. Should fail & retrigger shuffle - verify(shuffle, atLeast(0)).reportException(any(Throwable.class)); + verify(shuffle, atLeast(0)).reportException(any()); } else if (minFailurePerHost > 100) { //host failure is so high that this would not retrigger shuffle re-execution - verify(shuffle, atLeast(1)).reportException(any(Throwable.class)); + verify(shuffle, atLeast(1)).reportException(any()); } } @@ -330,24 +330,23 @@ public void testReducerHealth_2() throws IOException, InterruptedException { for (int i = 190; i < 200; i++) { InputAttemptIdentifier inputAttemptIdentifier = new InputAttemptIdentifier(i, 0, "attempt_"); - scheduler.copyFailed(inputAttemptIdentifier, new MapHost("host" + (i % totalProducerNodes), - 10000, i, 1), false, true, false); + scheduler.copyFailed(InputAttemptFetchFailure.fromAttempt(inputAttemptIdentifier), + new MapHost("host" + (i % totalProducerNodes), 10000, i, 1), false, true); } //Shuffle has not stalled. so no issues. - verify(scheduler.reporter, times(0)).reportException(any(Throwable.class)); + verify(scheduler.reporter, times(0)).reportException(any()); //stall shuffle scheduler.lastProgressTime = System.currentTimeMillis() - 250000; InputAttemptIdentifier inputAttemptIdentifier = new InputAttemptIdentifier(190, 0, "attempt_"); - scheduler.copyFailed(inputAttemptIdentifier, new MapHost("host" + - (190 % totalProducerNodes), - 10000, 190, 1), false, true, false); + scheduler.copyFailed(InputAttemptFetchFailure.fromAttempt(inputAttemptIdentifier), + new MapHost("host" + (190 % totalProducerNodes), 10000, 190, 1), false, true); //Even when it is stalled, need (320 - 300 = 20) * 3 = 60 failures - verify(scheduler.reporter, times(0)).reportException(any(Throwable.class)); + verify(scheduler.reporter, times(0)).reportException(any()); assertEquals(11, scheduler.failedShufflesSinceLastCompletion); @@ -355,38 +354,41 @@ public void testReducerHealth_2() throws IOException, InterruptedException { for (int i = 190; i < 200; i++) { inputAttemptIdentifier = new InputAttemptIdentifier(i, 0, "attempt_"); - scheduler.copyFailed(inputAttemptIdentifier, new MapHost("host" + (i % totalProducerNodes), - 10000, i, 1), false, true, false); - scheduler.copyFailed(inputAttemptIdentifier, new MapHost("host" + (i % totalProducerNodes), - 10000, i, 1), false, true, false); - scheduler.copyFailed(inputAttemptIdentifier, new MapHost("host" + (i % totalProducerNodes), - 10000, i, 1), false, true, false); - scheduler.copyFailed(inputAttemptIdentifier, new MapHost("host" + (i % totalProducerNodes), - 10000, i, 1), false, true, false); - scheduler.copyFailed(inputAttemptIdentifier, new MapHost("host" + (i % totalProducerNodes), - 10000, i, 1), false, true, false); + InputAttemptFetchFailure failure = InputAttemptFetchFailure.fromAttempt(inputAttemptIdentifier); + scheduler.copyFailed(failure, new MapHost("host" + (i % totalProducerNodes), + 10000, i, 1), false, true); + scheduler.copyFailed(failure, new MapHost("host" + (i % totalProducerNodes), + 10000, i, 1), false, true); + scheduler.copyFailed(failure, new MapHost("host" + (i % totalProducerNodes), + 10000, i, 1), false, true); + scheduler.copyFailed(failure, new MapHost("host" + (i % totalProducerNodes), + 10000, i, 1), false, true); + scheduler.copyFailed(failure, new MapHost("host" + (i % totalProducerNodes), + 10000, i, 1), false, true); } assertEquals(61, scheduler.failedShufflesSinceLastCompletion); assertEquals(10, scheduler.remainingMaps.get()); - verify(shuffle, atLeast(0)).reportException(any(Throwable.class)); + verify(shuffle, atLeast(0)).reportException(any()); //fail another 30 for (int i = 110; i < 120; i++) { inputAttemptIdentifier = new InputAttemptIdentifier(i, 0, "attempt_"); - scheduler.copyFailed(inputAttemptIdentifier, new MapHost("host" + (i % totalProducerNodes), - 10000, i, 1), false, true, false); - scheduler.copyFailed(inputAttemptIdentifier, new MapHost("host" + (i % totalProducerNodes), - 10000, i, 1), false, true, false); - scheduler.copyFailed(inputAttemptIdentifier, new MapHost("host" + (i % totalProducerNodes), - 10000, i, 1), false, true, false); + InputAttemptFetchFailure failure = + InputAttemptFetchFailure.fromAttempt(inputAttemptIdentifier); + scheduler.copyFailed(failure, new MapHost("host" + (i % totalProducerNodes), 10000, i, 1), + false, true); + scheduler.copyFailed(failure, new MapHost("host" + (i % totalProducerNodes), 10000, i, 1), + false, true); + scheduler.copyFailed(failure, new MapHost("host" + (i % totalProducerNodes), 10000, i, 1), + false, true); } // Should fail now due to fetcherHealthy. (stall has already happened and // these are the only pending tasks) - verify(shuffle, atLeast(1)).reportException(any(Throwable.class)); + verify(shuffle, atLeast(1)).reportException(any()); } @@ -432,8 +434,8 @@ public void testReducerHealth_3() throws IOException { //1 fails (last fetch) InputAttemptIdentifier inputAttemptIdentifier = new InputAttemptIdentifier(319, 0, "attempt_"); - scheduler.copyFailed(inputAttemptIdentifier, new MapHost("host" + (319 % totalProducerNodes), - 10000, 319, 1), false, true, false); + scheduler.copyFailed(InputAttemptFetchFailure.fromAttempt(inputAttemptIdentifier), + new MapHost("host" + (319 % totalProducerNodes), 10000, 319, 1), false, true); //stall the shuffle scheduler.lastProgressTime = System.currentTimeMillis() - 1000000; @@ -441,18 +443,16 @@ public void testReducerHealth_3() throws IOException { assertEquals(scheduler.remainingMaps.get(), 1); //Retry for 3 more times - scheduler.copyFailed(inputAttemptIdentifier, new MapHost("host" + (319 % - totalProducerNodes), - 10000, 319, 1), false, true, false); - scheduler.copyFailed(inputAttemptIdentifier, new MapHost("host" + (319 % - totalProducerNodes), - 10000, 310, 1), false, true, false); - scheduler.copyFailed(inputAttemptIdentifier, new MapHost("host" + (319 % - totalProducerNodes), - 10000, 310, 1), false, true, false); + InputAttemptFetchFailure failure = InputAttemptFetchFailure.fromAttempt(inputAttemptIdentifier); + scheduler.copyFailed(failure, new MapHost("host" + (319 % totalProducerNodes), 10000, 319, 1), + false, true); + scheduler.copyFailed(failure, new MapHost("host" + (319 % totalProducerNodes), 10000, 310, 1), + false, true); + scheduler.copyFailed(failure, new MapHost("host" + (319 % totalProducerNodes), 10000, 310, 1), + false, true); // failedShufflesSinceLastCompletion has crossed the limits. Throw error - verify(shuffle, times(0)).reportException(any(Throwable.class)); + verify(shuffle, times(0)).reportException(any()); } @Test(timeout = 60000) @@ -487,15 +487,15 @@ public void testReducerHealth_4() throws IOException { for (int i = 0; i < 64; i++) { InputAttemptIdentifier inputAttemptIdentifier = new InputAttemptIdentifier(i, 0, "attempt_"); + InputAttemptFetchFailure failure = + InputAttemptFetchFailure.fromAttempt(inputAttemptIdentifier); - scheduler.copyFailed(inputAttemptIdentifier, new MapHost("host" + (i % - totalProducerNodes), 10000, i, 1), false, true, false); - - scheduler.copyFailed(inputAttemptIdentifier, new MapHost("host" + (i % - totalProducerNodes), 10000, i, 1), false, true, false); - - scheduler.copyFailed(inputAttemptIdentifier, new MapHost("host" + (i % - totalProducerNodes), 10000, i, 1), false, true, false); + scheduler.copyFailed(failure, new MapHost("host" + (i % totalProducerNodes), 10000, i, 1), + false, true); + scheduler.copyFailed(failure, new MapHost("host" + (i % totalProducerNodes), 10000, i, 1), + false, true); + scheduler.copyFailed(failure, new MapHost("host" + (i % totalProducerNodes), 10000, i, 1), + false, true); MapOutput mapOutput = MapOutput .createMemoryMapOutput(inputAttemptIdentifier, mock(FetchedInputAllocatorOrderedGrouped.class), @@ -518,8 +518,8 @@ public void testReducerHealth_4() throws IOException { //1 fails (last fetch) InputAttemptIdentifier inputAttemptIdentifier = new InputAttemptIdentifier(319, 0, "attempt_"); - scheduler.copyFailed(inputAttemptIdentifier, new MapHost("host" + (319 % totalProducerNodes), - 10000, 319, 1), false, true, false); + scheduler.copyFailed(new InputAttemptFetchFailure(inputAttemptIdentifier), + new MapHost("host" + (319 % totalProducerNodes), 10000, 319, 1), false, true); //stall the shuffle (but within limits) scheduler.lastProgressTime = System.currentTimeMillis() - 100000; @@ -527,27 +527,24 @@ public void testReducerHealth_4() throws IOException { assertEquals(scheduler.remainingMaps.get(), 1); //Retry for 3 more times - scheduler.copyFailed(inputAttemptIdentifier, new MapHost("host" + (319 % - totalProducerNodes), - 10000, 319, 1), false, true, false); - scheduler.copyFailed(inputAttemptIdentifier, new MapHost("host" + (319 % - totalProducerNodes), - 10000, 319, 1), false, true, false); - scheduler.copyFailed(inputAttemptIdentifier, new MapHost("host" + (319 % - totalProducerNodes), - 10000, 319, 1), false, true, false); + InputAttemptFetchFailure failure = InputAttemptFetchFailure.fromAttempt(inputAttemptIdentifier); + scheduler.copyFailed(failure, new MapHost("host" + (319 % totalProducerNodes), 10000, 319, 1), + false, true); + scheduler.copyFailed(failure, new MapHost("host" + (319 % totalProducerNodes), 10000, 319, 1), + false, true); + scheduler.copyFailed(failure, new MapHost("host" + (319 % totalProducerNodes), 10000, 319, 1), + false, true); // failedShufflesSinceLastCompletion has crossed the limits. 20% of other nodes had failures as // well. However, it has failed only in one host. So this should proceed // until AM decides to restart the producer. - verify(shuffle, times(0)).reportException(any(Throwable.class)); + verify(shuffle, times(0)).reportException(any()); //stall the shuffle (but within limits) scheduler.lastProgressTime = System.currentTimeMillis() - 300000; - scheduler.copyFailed(inputAttemptIdentifier, new MapHost("host" + (319 % - totalProducerNodes), - 10000, 319, 1), false, true, false); - verify(shuffle, times(1)).reportException(any(Throwable.class)); + scheduler.copyFailed(InputAttemptFetchFailure.fromAttempt(inputAttemptIdentifier), + new MapHost("host" + (319 % totalProducerNodes), 10000, 319, 1), false, true); + verify(shuffle, times(1)).reportException(any()); } @@ -592,8 +589,9 @@ public void testReducerHealth_5() throws IOException { //1 fails (last fetch) InputAttemptIdentifier inputAttemptIdentifier = new InputAttemptIdentifier(318, 0, "attempt_"); - scheduler.copyFailed(inputAttemptIdentifier, new MapHost("host" + (318 % totalProducerNodes), - 10000, 318, 1), false, true, false); + InputAttemptFetchFailure failure = new InputAttemptFetchFailure(inputAttemptIdentifier); + scheduler.copyFailed(failure, new MapHost("host" + (318 % totalProducerNodes), + 10000, 318, 1), false, true); //stall the shuffle scheduler.lastProgressTime = System.currentTimeMillis() - 1000000; @@ -601,18 +599,15 @@ public void testReducerHealth_5() throws IOException { assertEquals(scheduler.remainingMaps.get(), 1); //Retry for 3 more times - scheduler.copyFailed(inputAttemptIdentifier, new MapHost("host" + (318 % - totalProducerNodes), - 10000, 318, 1), false, true, false); - scheduler.copyFailed(inputAttemptIdentifier, new MapHost("host" + (318 % - totalProducerNodes), - 10000, 318, 1), false, true, false); - scheduler.copyFailed(inputAttemptIdentifier, new MapHost("host" + (318 % - totalProducerNodes), - 10000, 318, 1), false, true, false); + scheduler.copyFailed(failure, new MapHost("host" + (318 % totalProducerNodes), 10000, 318, 1), + false, true); + scheduler.copyFailed(failure, new MapHost("host" + (318 % totalProducerNodes), 10000, 318, 1), + false, true); + scheduler.copyFailed(failure, new MapHost("host" + (318 % totalProducerNodes), 10000, 318, 1), + false, true); //Shuffle has not received the events completely. So do not bail out yet. - verify(shuffle, times(0)).reportException(any(Throwable.class)); + verify(shuffle, times(0)).reportException(any()); } @@ -672,24 +667,24 @@ public void _testReducerHealth_6(Configuration conf) throws IOException { for (int i = 10; i < 15; i++) { InputAttemptIdentifier inputAttemptIdentifier = new InputAttemptIdentifier(i, 0, "attempt_"); - scheduler.copyFailed(inputAttemptIdentifier, new MapHost("host" + (i % totalProducerNodes), - 10000, i, 1), false, true, false); + scheduler.copyFailed(InputAttemptFetchFailure.fromAttempt(inputAttemptIdentifier), + new MapHost("host" + (i % totalProducerNodes), 10000, i, 1), false, true); } assertTrue(scheduler.failureCounts.size() >= 5); assertEquals(scheduler.remainingMaps.get(), 310); //Do not bail out (number of failures is just 5) - verify(scheduler.reporter, times(0)).reportException(any(Throwable.class)); + verify(scheduler.reporter, times(0)).reportException(any()); //5 fetches fail repeatedly for (int i = 10; i < 15; i++) { InputAttemptIdentifier inputAttemptIdentifier = new InputAttemptIdentifier(i, 0, "attempt_"); - scheduler.copyFailed(inputAttemptIdentifier, new MapHost("host" + (i % totalProducerNodes), - 10000, i, 1), false, true, false); - scheduler.copyFailed(inputAttemptIdentifier, new MapHost("host" + (i % totalProducerNodes), - 10000, i, 1), false, true, false); + scheduler.copyFailed(InputAttemptFetchFailure.fromAttempt(inputAttemptIdentifier), + new MapHost("host" + (i % totalProducerNodes), 10000, i, 1), false, true); + scheduler.copyFailed(InputAttemptFetchFailure.fromAttempt(inputAttemptIdentifier), + new MapHost("host" + (i % totalProducerNodes), 10000, i, 1), false, true); } boolean checkFailedFetchSinceLastCompletion = conf.getBoolean @@ -701,10 +696,10 @@ public void _testReducerHealth_6(Configuration conf) throws IOException { // Now bail out, as Shuffle has crossed the // failedShufflesSinceLastCompletion limits. (even // though reducerHeathly is - verify(shuffle, atLeast(1)).reportException(any(Throwable.class)); + verify(shuffle, atLeast(1)).reportException(any()); } else { //Do not bail out yet. - verify(shuffle, atLeast(0)).reportException(any(Throwable.class)); + verify(shuffle, atLeast(0)).reportException(any()); } } @@ -749,21 +744,18 @@ public void testReducerHealth_7() throws IOException { for (int i = 100; i < 199; i++) { InputAttemptIdentifier inputAttemptIdentifier = new InputAttemptIdentifier(i, 0, "attempt_"); - scheduler.copyFailed(inputAttemptIdentifier, - new MapHost("host" + (i % totalProducerNodes), 10000, i, 1), - false, true, false); - scheduler.copyFailed(inputAttemptIdentifier, - new MapHost("host" + (i % totalProducerNodes), 10000, i, 1), - false, true, false); - scheduler.copyFailed(inputAttemptIdentifier, - new MapHost("host" + (i % totalProducerNodes), 10000, i, 1), - false, true, false); - scheduler.copyFailed(inputAttemptIdentifier, - new MapHost("host" + (i % totalProducerNodes), 10000, i, 1), - false, true, false); + InputAttemptFetchFailure failure = InputAttemptFetchFailure.fromAttempt(inputAttemptIdentifier); + scheduler.copyFailed(failure, new MapHost("host" + (i % totalProducerNodes), 10000, i, 1), + false, true); + scheduler.copyFailed(failure, new MapHost("host" + (i % totalProducerNodes), 10000, i, 1), + false, true); + scheduler.copyFailed(failure, new MapHost("host" + (i % totalProducerNodes), 10000, i, 1), + false, true); + scheduler.copyFailed(failure, new MapHost("host" + (i % totalProducerNodes), 10000, i, 1), + false, true); } - verify(shuffle, atLeast(1)).reportException(any(Throwable.class)); + verify(shuffle, atLeast(1)).reportException(any()); } private ShuffleSchedulerForTest createScheduler(long startTime, int @@ -799,7 +791,8 @@ public void testPenalty() throws IOException, InterruptedException { MapHost mapHost = scheduler.pendingHosts.iterator().next(); //Fails to pull from host0. host0 should be added to penalties - scheduler.copyFailed(inputAttemptIdentifier, mapHost, false, true, false); + scheduler.copyFailed(InputAttemptFetchFailure.fromAttempt(inputAttemptIdentifier), mapHost, + false, true); //Should not get host, as it is added to penalty loop MapHost host = scheduler.getHost(); @@ -812,6 +805,28 @@ public void testPenalty() throws IOException, InterruptedException { assertFalse("Host identifier mismatch", (host.getHost() + ":" + host.getPort() + ":" + host.getPartitionId()).equalsIgnoreCase("host0:10000")); } + @Test (timeout = 20000) + public void testProgressDuringGetHostWait() throws IOException, InterruptedException { + long startTime = System.currentTimeMillis(); + Configuration conf = new TezConfiguration(); + Shuffle shuffle = mock(Shuffle.class); + final ShuffleSchedulerForTest scheduler = createScheduler(startTime, 1, shuffle, conf); + Thread schedulerGetHostThread = new Thread(new Runnable() { + @Override + public void run() { + try { + scheduler.getHost(); + } catch (Exception e) { + e.printStackTrace(); + } + } + }); + schedulerGetHostThread.start(); + Thread.currentThread().sleep(1000 * 3 + 1000); + schedulerGetHostThread.interrupt(); + verify(scheduler.inputContext, atLeast(3)).notifyProgress(); + } + @Test(timeout = 5000) public void testShutdown() throws Exception { InputContext inputContext = createTezInputContext(); @@ -931,6 +946,55 @@ public Void call() throws Exception { } } + @Test (timeout = 120000) + public void testPenalties() throws Exception { + InputContext inputContext = createTezInputContext(); + Configuration conf = new TezConfiguration(); + conf.setInt(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_HOST_PENALTY_TIME_LIMIT_MS, 20000); + int numInputs = 10; + Shuffle shuffle = mock(Shuffle.class); + MergeManager mergeManager = mock(MergeManager.class); + + final ShuffleSchedulerForTest scheduler = + new ShuffleSchedulerForTest(inputContext, conf, numInputs, shuffle, mergeManager, + mergeManager, + System.currentTimeMillis(), null, false, 0, "srcName"); + + ExecutorService executor = Executors.newFixedThreadPool(1); + + Future executorFuture = executor.submit(new Callable() { + @Override + public Void call() throws Exception { + scheduler.start(); + return null; + } + }); + + InputAttemptIdentifier[] identifiers = new InputAttemptIdentifier[numInputs]; + + for (int i = 0; i < numInputs; i++) { + CompositeInputAttemptIdentifier inputAttemptIdentifier = + new CompositeInputAttemptIdentifier(i, 0, "attempt_", 1); + scheduler.addKnownMapOutput("host" + i, 10000, 1, inputAttemptIdentifier); + identifiers[i] = inputAttemptIdentifier; + } + + MapHost[] mapHosts = new MapHost[numInputs]; + int count = 0; + for (MapHost mh : scheduler.mapLocations.values()) { + mapHosts[count++] = mh; + } + + for (int i = 0; i < 10; i++) { + scheduler.copyFailed(InputAttemptFetchFailure.fromAttempt(identifiers[0]), mapHosts[0], false, + false); + } + ShuffleScheduler.Penalty[] penaltyArray = new ShuffleScheduler.Penalty[scheduler.getPenalties().size()]; + scheduler.getPenalties().toArray(penaltyArray); + for (int i = 0; i < penaltyArray.length; i++) { + Assert.assertTrue(penaltyArray[i].getDelay(TimeUnit.MILLISECONDS) <= 20000); + } + } private InputContext createTezInputContext() throws IOException { ApplicationId applicationId = ApplicationId.newInstance(1, 1); @@ -952,8 +1016,8 @@ private InputContext createTezInputContext() throws IOException { @Override public ExecutorService answer(InvocationOnMock invocation) throws Throwable { return sharedExecutor.createExecutorService( - invocation.getArgumentAt(0, Integer.class), - invocation.getArgumentAt(1, String.class)); + invocation.getArgument(0, Integer.class), + invocation.getArgument(1, String.class)); } }); return inputContext; @@ -961,9 +1025,10 @@ public ExecutorService answer(InvocationOnMock invocation) throws Throwable { private static class ShuffleSchedulerForTest extends ShuffleScheduler { - private final AtomicInteger numFetchersCreated = new AtomicInteger(0); + private CountDownLatch latch = new CountDownLatch(10); private final boolean fetcherShouldWait; private final ExceptionReporter reporter; + private final InputContext inputContext; public ShuffleSchedulerForTest(InputContext inputContext, Configuration conf, int numberOfInputs, @@ -989,11 +1054,12 @@ public ShuffleSchedulerForTest(InputContext inputContext, Configuration conf, ifileReadAhead, ifileReadAheadLength, srcNameTrimmed); this.fetcherShouldWait = fetcherShouldWait; this.reporter = shuffle; + this.inputContext = inputContext; } @Override FetcherOrderedGrouped constructFetcherForHost(MapHost mapHost) { - numFetchersCreated.incrementAndGet(); + latch.countDown(); FetcherOrderedGrouped mockFetcher = mock(FetcherOrderedGrouped.class); doAnswer(new Answer() { @Override diff --git a/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/sort/impl/TestIFile.java b/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/sort/impl/TestIFile.java index f06fda3873..960aee345a 100644 --- a/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/sort/impl/TestIFile.java +++ b/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/sort/impl/TestIFile.java @@ -18,6 +18,13 @@ package org.apache.tez.runtime.library.common.sort.impl; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.IOException; @@ -29,9 +36,7 @@ import java.util.List; import java.util.Random; -import org.junit.Assert; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; +import org.apache.hadoop.conf.Configurable; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.ChecksumException; import org.apache.hadoop.fs.FSDataInputStream; @@ -39,27 +44,37 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.BoundedByteArrayOutputStream; -import org.apache.tez.runtime.library.utils.BufferUtils; import org.apache.hadoop.io.DataInputBuffer; import org.apache.hadoop.io.DataOutputBuffer; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.io.compress.CompressionCodec; import org.apache.hadoop.io.compress.CompressionCodecFactory; +import org.apache.hadoop.io.compress.lz4.Lz4Compressor; import org.apache.hadoop.io.serializer.Deserializer; import org.apache.hadoop.io.serializer.SerializationFactory; +import org.apache.hadoop.io.serializer.WritableSerialization; +import org.apache.hadoop.util.NativeCodeLoader; +import org.apache.tez.common.TezRuntimeFrameworkConfigs; import org.apache.tez.runtime.library.common.InputAttemptIdentifier; import org.apache.tez.runtime.library.common.shuffle.orderedgrouped.InMemoryReader; import org.apache.tez.runtime.library.common.shuffle.orderedgrouped.InMemoryWriter; import org.apache.tez.runtime.library.common.sort.impl.IFile.Reader; import org.apache.tez.runtime.library.common.sort.impl.IFile.Writer; +import org.apache.tez.runtime.library.common.task.local.output.TezTaskOutputFiles; import org.apache.tez.runtime.library.testutils.KVDataGen; import org.apache.tez.runtime.library.testutils.KVDataGen.KVPair; +import org.apache.tez.runtime.library.utils.BufferUtils; +import org.apache.tez.runtime.library.utils.CodecUtils; import org.junit.After; +import org.junit.Assert; +import org.junit.Assume; import org.junit.Before; import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; -import static org.junit.Assert.*; +import com.google.protobuf.ByteString; public class TestIFile { @@ -83,6 +98,7 @@ public class TestIFile { new Path(System.getProperty("test.build.data", "/tmp")), TestIFile.class.getName()) .makeQualified(localFs.getUri(), localFs.getWorkingDirectory()); LOG.info("Using workDir: " + workDir); + defaultConf.set(TezRuntimeFrameworkConfigs.LOCAL_DIRS, workDir.toString()); } catch (IOException e) { throw new RuntimeException(e); } @@ -132,7 +148,7 @@ public void testCompressedFlag() throws IOException { public void testWritingEmptyKeyValues() throws IOException { DataInputBuffer key = new DataInputBuffer(); DataInputBuffer value = new DataInputBuffer(); - IFile.Writer writer = new IFile.Writer(defaultConf, localFs, outputPath, null, null, null, + IFile.Writer writer = new IFile.Writer(null, null, localFs, outputPath, null, null, null, null, null); writer.append(key, value); writer.append(key, value); @@ -191,7 +207,7 @@ public void testExceedMaxSize() throws IOException { // Check Key length exceeding MAX_BUFFER_SIZE out = localFs.create(outputPath); - writer = new IFile.Writer(defaultConf, out, + writer = new IFile.Writer(new WritableSerialization(), new WritableSerialization(), out, Text.class, Text.class, null, null, null, false); writer.append(longString, shortString); writer.close(); @@ -214,7 +230,7 @@ public void testExceedMaxSize() throws IOException { // Check Value length exceeding MAX_BUFFER_SIZE out = localFs.create(outputPath); - writer = new IFile.Writer(defaultConf, out, + writer = new IFile.Writer(new WritableSerialization(), new WritableSerialization(), out, Text.class, Text.class, null, null, null, false); writer.append(shortString, longString); writer.close(); @@ -238,7 +254,7 @@ public void testExceedMaxSize() throws IOException { // Check Key length not getting doubled out = localFs.create(outputPath); - writer = new IFile.Writer(defaultConf, out, + writer = new IFile.Writer(new WritableSerialization(), new WritableSerialization(), out, Text.class, Text.class, null, null, null, false); writer.append(longString, shortString); writer.close(); @@ -257,7 +273,7 @@ public void testExceedMaxSize() throws IOException { // Check Value length not getting doubled out = localFs.create(outputPath); - writer = new IFile.Writer(defaultConf, out, + writer = new IFile.Writer(new WritableSerialization(), new WritableSerialization(), out, Text.class, Text.class, null, null, null, false); writer.append(shortString, longString); writer.close(); @@ -284,7 +300,7 @@ public void testExceedMaxSize() throws IOException { public void testWithRLEMarker() throws IOException { //Test with append(Object, Object) FSDataOutputStream out = localFs.create(outputPath); - IFile.Writer writer = new IFile.Writer(defaultConf, out, + IFile.Writer writer = new IFile.Writer(new WritableSerialization(), new WritableSerialization(), out, Text.class, IntWritable.class, codec, null, null, true); Text key = new Text("key0"); @@ -310,9 +326,12 @@ public void testWithRLEMarker() throws IOException { int valueLength = 6; int pos = 0; out = localFs.create(outputPath); - writer = new IFile.Writer(defaultConf, out, + writer = new IFile.Writer(new WritableSerialization(), new WritableSerialization(), out, Text.class, IntWritable.class, codec, null, null, true); + BoundedByteArrayOutputStream boundedOut = new BoundedByteArrayOutputStream(1024*1024); + Writer inMemWriter = new InMemoryWriter(boundedOut, true); + DataInputBuffer kin = new DataInputBuffer(); kin.reset(kvbuffer, pos, keyLength); @@ -324,6 +343,8 @@ public void testWithRLEMarker() throws IOException { //Write initial KV pair writer.append(kin, vin); assertFalse(writer.sameKey); + inMemWriter.append(kin, vin); + assertFalse(inMemWriter.sameKey); pos += (keyLength + valueLength); //Second key is similar to key1 (RLE should kick in) @@ -332,6 +353,8 @@ public void testWithRLEMarker() throws IOException { vin.reset(vout.getData(), vout.getLength()); writer.append(kin, vin); assertTrue(writer.sameKey); + inMemWriter.append(kin, vin); + assertTrue(inMemWriter.sameKey); pos += (keyLength + valueLength); //Next key (key3) is different (RLE should not kick in) @@ -340,9 +363,13 @@ public void testWithRLEMarker() throws IOException { vin.reset(vout.getData(), vout.getLength()); writer.append(kin, vin); assertFalse(writer.sameKey); + inMemWriter.append(kin, vin); + assertFalse(inMemWriter.sameKey); writer.close(); out.close(); + inMemWriter.close(); + boundedOut.close(); } @Test(timeout = 5000) @@ -416,25 +443,25 @@ public void testInMemoryWriter() throws IOException { //No RLE, No RepeatKeys, no compression writer = new InMemoryWriter(bout); - writeTestFileUsingDataBuffer(writer, false, false, data, null); + writeTestFileUsingDataBuffer(writer, false, data); readUsingInMemoryReader(bout.getBuffer(), data); //No RLE, RepeatKeys, no compression bout.reset(); writer = new InMemoryWriter(bout); - writeTestFileUsingDataBuffer(writer, false, true, data, null); + writeTestFileUsingDataBuffer(writer, true, data); readUsingInMemoryReader(bout.getBuffer(), data); //RLE, No RepeatKeys, no compression bout.reset(); - writer = new InMemoryWriter(bout); - writeTestFileUsingDataBuffer(writer, true, false, data, null); + writer = new InMemoryWriter(bout, true); + writeTestFileUsingDataBuffer(writer, false, data); readUsingInMemoryReader(bout.getBuffer(), data); //RLE, RepeatKeys, no compression bout.reset(); - writer = new InMemoryWriter(bout); - writeTestFileUsingDataBuffer(writer, true, true, data, null); + writer = new InMemoryWriter(bout, true); + writeTestFileUsingDataBuffer(writer, true, data); readUsingInMemoryReader(bout.getBuffer(), data); } @@ -442,8 +469,8 @@ public void testInMemoryWriter() throws IOException { //Test appendValue feature public void testAppendValue() throws IOException { List data = KVDataGen.generateTestData(false, rnd.nextInt(100)); - IFile.Writer writer = new IFile.Writer(defaultConf, localFs, outputPath, - Text.class, IntWritable.class, codec, null, null); + IFile.Writer writer = new IFile.Writer(new WritableSerialization(), new WritableSerialization(), + localFs, outputPath, Text.class, IntWritable.class, codec, null, null); Text previousKey = null; for (KVPair kvp : data) { @@ -473,8 +500,8 @@ public void testAppendValues() throws IOException { values.add(val); } - IFile.Writer writer = new IFile.Writer(defaultConf, localFs, outputPath, - Text.class, IntWritable.class, codec, null, null); + IFile.Writer writer = new IFile.Writer(new WritableSerialization(), new WritableSerialization(), + localFs, outputPath, Text.class, IntWritable.class, codec, null, null); writer.append(data.get(0).getKey(), data.get(0).getvalue()); //write first KV pair writer.appendValues(values.subList(1, values.size()).iterator()); //add the rest here @@ -488,6 +515,130 @@ public void testAppendValues() throws IOException { readAndVerifyData(writer.getRawLength(), writer.getCompressedLength(), data, codec); } + @Test(timeout = 5000) + // Basic test + public void testFileBackedInMemIFileWriter() throws IOException { + List data = new ArrayList<>(); + List values = new ArrayList<>(); + Text key = new Text("key"); + IntWritable val = new IntWritable(1); + for(int i = 0; i < 5; i++) { + data.add(new KVPair(key, val)); + values.add(val); + } + + TezTaskOutputFiles tezTaskOutput = new TezTaskOutputFiles(defaultConf, "uniqueId", 1); + IFile.FileBackedInMemIFileWriter writer = new IFile.FileBackedInMemIFileWriter( + new WritableSerialization(), new WritableSerialization(), localFs, tezTaskOutput, + Text.class, IntWritable.class, codec, null, null, + 200); + + writer.appendKeyValues(data.get(0).getKey(), values.iterator()); + Text lastKey = new Text("key3"); + IntWritable lastVal = new IntWritable(10); + data.add(new KVPair(lastKey, lastVal)); + writer.append(lastKey, lastVal); + writer.close(); + + byte[] bytes = new byte[(int) writer.getRawLength()]; + IFile.Reader.readToMemory(bytes, + new ByteArrayInputStream(ByteString.copyFrom(writer.getData()).toByteArray()), + (int) writer.getCompressedLength(), codec, false, -1); + readUsingInMemoryReader(bytes, data); + } + + @Test(timeout = 5000) + // Basic test + public void testFileBackedInMemIFileWriterWithSmallBuffer() throws IOException { + List data = new ArrayList<>(); + TezTaskOutputFiles tezTaskOutput = new TezTaskOutputFiles(defaultConf, "uniqueId", 1); + IFile.FileBackedInMemIFileWriter writer = new IFile.FileBackedInMemIFileWriter( + new WritableSerialization(), new WritableSerialization(), localFs, tezTaskOutput, + Text.class, IntWritable.class, codec, null, null, + 2); + + // empty ifile + writer.close(); + + // Buffer should have self adjusted. So for this empty file, it shouldn't + // hit disk. + assertFalse("Data should have been flushed to disk", writer.isDataFlushedToDisk()); + + byte[] bytes = new byte[(int) writer.getRawLength()]; + IFile.Reader.readToMemory(bytes, + new ByteArrayInputStream(ByteString.copyFrom(writer.getData()).toByteArray()), + (int) writer.getCompressedLength(), codec, false, -1); + + readUsingInMemoryReader(bytes, data); + } + + @Test(timeout = 20000) + // Test file spill over scenario + public void testFileBackedInMemIFileWriter_withSpill() throws IOException { + List data = new ArrayList<>(); + List values = new ArrayList<>(); + + Text key = new Text("key"); + IntWritable val = new IntWritable(1); + for(int i = 0; i < 5; i++) { + data.add(new KVPair(key, val)); + values.add(val); + } + + // Setting cache limit to 20. Actual data would be around 43 bytes, so it would spill over. + TezTaskOutputFiles tezTaskOutput = new TezTaskOutputFiles(defaultConf, "uniqueId", 1); + IFile.FileBackedInMemIFileWriter writer = new IFile.FileBackedInMemIFileWriter( + new WritableSerialization(), new WritableSerialization(), localFs, tezTaskOutput, + Text.class, IntWritable.class, codec, null, null, + 20); + writer.setOutputPath(outputPath); + + writer.appendKeyValues(data.get(0).getKey(), values.iterator()); + Text lastKey = new Text("key3"); + IntWritable lastVal = new IntWritable(10); + + data.add(new KVPair(lastKey, lastVal)); + writer.append(lastKey, lastVal); + writer.close(); + + assertTrue("Data should have been flushed to disk", writer.isDataFlushedToDisk()); + + // Read output content to memory + FSDataInputStream inStream = localFs.open(outputPath); + byte[] bytes = new byte[(int) writer.getRawLength()]; + + IFile.Reader.readToMemory(bytes, inStream, + (int) writer.getCompressedLength(), codec, false, -1); + inStream.close(); + + readUsingInMemoryReader(bytes, data); + } + + @Test(timeout = 5000) + // Test empty file case + public void testEmptyFileBackedInMemIFileWriter() throws IOException { + List data = new ArrayList<>(); + TezTaskOutputFiles + tezTaskOutput = new TezTaskOutputFiles(defaultConf, "uniqueId", 1); + + IFile.FileBackedInMemIFileWriter writer = new IFile.FileBackedInMemIFileWriter( + new WritableSerialization(), new WritableSerialization(), localFs, tezTaskOutput, + Text.class, IntWritable.class, codec, null, null, + 100); + + // empty ifile + writer.close(); + + byte[] bytes = new byte[(int) writer.getRawLength()]; + + IFile.Reader.readToMemory(bytes, + new ByteArrayInputStream(ByteString.copyFrom(writer.getData()).toByteArray()), + (int) writer.getCompressedLength(), codec, false, -1); + + readUsingInMemoryReader(bytes, data); + } + + @Test(timeout = 5000) //Test appendKeyValues feature public void testAppendKeyValues() throws IOException { @@ -501,8 +652,8 @@ public void testAppendKeyValues() throws IOException { values.add(val); } - IFile.Writer writer = new IFile.Writer(defaultConf, localFs, outputPath, - Text.class, IntWritable.class, codec, null, null); + IFile.Writer writer = new IFile.Writer(new WritableSerialization(), new WritableSerialization(), + localFs, outputPath, Text.class, IntWritable.class, codec, null, null); writer.appendKeyValues(data.get(0).getKey(), values.iterator()); Text lastKey = new Text("key3"); @@ -519,8 +670,8 @@ public void testAppendKeyValues() throws IOException { //Test appendValue with DataInputBuffer public void testAppendValueWithDataInputBuffer() throws IOException { List data = KVDataGen.generateTestData(false, rnd.nextInt(100)); - IFile.Writer writer = new IFile.Writer(defaultConf, localFs, outputPath, - Text.class, IntWritable.class, codec, null, null); + IFile.Writer writer = new IFile.Writer(new WritableSerialization(), new WritableSerialization(), + localFs, outputPath, Text.class, IntWritable.class, codec, null, null); final DataInputBuffer previousKey = new DataInputBuffer(); DataInputBuffer key = new DataInputBuffer(); @@ -579,6 +730,76 @@ public void testReadToDisk() throws IOException { reader.close(); } + @Test + public void testInMemoryBufferSize() throws IOException { + Configurable configurableCodec = (Configurable) codec; + int originalCodecBufferSize = + configurableCodec.getConf().getInt(CodecUtils.getBufferSizeProperty(codec), -1); + + // for smaller amount of data, codec buffer should be sized according to compressed data length + List data = KVDataGen.generateTestData(false, rnd.nextInt(100)); + Writer writer = writeTestFile(false, false, data, codec); + readAndVerifyData(writer.getRawLength(), writer.getCompressedLength(), data, codec); + + Assert.assertEquals(originalCodecBufferSize, // original size is repaired + configurableCodec.getConf().getInt(CodecUtils.getBufferSizeProperty(codec), 0)); + + // buffer size cannot grow infinitely with compressed data size + data = KVDataGen.generateTestDataOfKeySize(false, 20000, rnd.nextInt(100)); + writer = writeTestFile(false, false, data, codec); + readAndVerifyData(writer.getRawLength(), writer.getCompressedLength(), data, codec); + + Assert.assertEquals(originalCodecBufferSize, // original size is repaired + configurableCodec.getConf().getInt(CodecUtils.getBufferSizeProperty(codec), 0)); + } + + @Test(expected = IllegalArgumentException.class) + public void testSmallDataCompression() throws IOException { + Assume.assumeTrue(NativeCodeLoader.isNativeCodeLoaded()); + + tryWriteFileWithBufferSize(17, "org.apache.hadoop.io.compress.Lz4Codec"); + tryWriteFileWithBufferSize(32, "org.apache.hadoop.io.compress.Lz4Codec"); + } + + private void tryWriteFileWithBufferSize(int bufferSize, String codecClassName) + throws IOException { + Configuration conf = new Configuration(); + + System.out.println("trying with buffer size: " + bufferSize); + conf.set(CodecUtils.getBufferSizeProperty(codecClassName), Integer.toString(bufferSize)); + CompressionCodecFactory codecFactory = new CompressionCodecFactory(conf); + CompressionCodec codecToTest = + codecFactory.getCodecByClassName(codecClassName); + List data = KVDataGen.generateTestDataOfKeySize(false, 1, 0); + writeTestFile(false, false, data, codecToTest); + } + + @Test(expected = IllegalArgumentException.class) + public void testLz4CompressedDataIsLargerThanOriginal() throws IOException { + Assume.assumeTrue(NativeCodeLoader.isNativeCodeLoaded()); + + // this one succeeds + byte[] buf = new byte[32]; + initBufWithNumbers(buf, 24, 45, 55, 49, 54, 55, 55, 54, 49, 48, 50, 55, 49, 56, 54, 48, 57, 48); + Lz4Compressor comp = new Lz4Compressor(32, false); + comp.setInput(buf, 0, 32); + comp.compress(buf, 0, 32); + + // adding 1 more element makes that fail + buf = new byte[32]; + initBufWithNumbers(buf, 24, 45, 55, 49, 54, 55, 55, 54, 49, 48, 50, 55, 49, 56, 54, 48, 57, 48, + 50); + comp = new Lz4Compressor(32, false); + comp.setInput(buf, 0, 32); + comp.compress(buf, 0, 32); + } + + private void initBufWithNumbers(byte[] buf, int... args) { + for (int i = 0; i < args.length; i++) { + buf[i] = (byte) args[i]; + } + } + /** * Test different options (RLE, repeat keys, compression) on reader/writer * @@ -751,15 +972,15 @@ private void verifyData(Reader reader, List data) private Writer writeTestFile(boolean rle, boolean repeatKeys, List data, CompressionCodec codec) throws IOException { FSDataOutputStream out = localFs.create(outputPath); - IFile.Writer writer = new IFile.Writer(defaultConf, out, + IFile.Writer writer = new IFile.Writer(new WritableSerialization(), new WritableSerialization(), out, Text.class, IntWritable.class, codec, null, null, rle); - writeTestFile(writer, rle, repeatKeys, data, codec); + writeTestFile(writer, repeatKeys, data); out.close(); return writer; } - private Writer writeTestFile(IFile.Writer writer, boolean rle, boolean repeatKeys, - List data, CompressionCodec codec) throws IOException { + private Writer writeTestFile(IFile.Writer writer, boolean repeatKeys, + List data) throws IOException { assertNotNull(writer); Text previousKey = null; @@ -784,15 +1005,15 @@ private Writer writeTestFile(IFile.Writer writer, boolean rle, boolean repeatKey private Writer writeTestFileUsingDataBuffer(boolean rle, boolean repeatKeys, List data, CompressionCodec codec) throws IOException { FSDataOutputStream out = localFs.create(outputPath); - IFile.Writer writer = new IFile.Writer(defaultConf, out, + IFile.Writer writer = new IFile.Writer(new WritableSerialization(), new WritableSerialization(), out, Text.class, IntWritable.class, codec, null, null, rle); - writeTestFileUsingDataBuffer(writer, rle, repeatKeys, data, codec); + writeTestFileUsingDataBuffer(writer, repeatKeys, data); out.close(); return writer; } - private Writer writeTestFileUsingDataBuffer(IFile.Writer writer, boolean rle, boolean repeatKeys, - List data, CompressionCodec codec) throws IOException { + private Writer writeTestFileUsingDataBuffer(Writer writer, boolean repeatKeys, + List data) throws IOException { DataInputBuffer previousKey = new DataInputBuffer(); DataInputBuffer key = new DataInputBuffer(); DataInputBuffer value = new DataInputBuffer(); diff --git a/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/sort/impl/TestPipelinedSorter.java b/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/sort/impl/TestPipelinedSorter.java index f85272bcaa..3955676076 100644 --- a/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/sort/impl/TestPipelinedSorter.java +++ b/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/sort/impl/TestPipelinedSorter.java @@ -21,7 +21,9 @@ import com.google.common.collect.Maps; import org.apache.commons.lang.RandomStringUtils; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.CommonConfigurationKeys; import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.LocalDirAllocator; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.DataInputBuffer; import org.apache.hadoop.io.DataOutputBuffer; @@ -39,10 +41,12 @@ import org.apache.tez.runtime.api.ExecutionContext; import org.apache.tez.runtime.api.OutputContext; import org.apache.tez.runtime.api.OutputStatisticsReporter; +import org.apache.tez.runtime.api.TaskContext; import org.apache.tez.runtime.api.impl.ExecutionContextImpl; import org.apache.tez.runtime.library.api.TezRuntimeConfiguration; import org.apache.tez.runtime.library.api.TezRuntimeConfiguration.ReportPartitionStats; -import org.apache.tez.runtime.library.common.shuffle.ShuffleUtils; +import org.apache.tez.runtime.library.common.Constants; +import org.apache.tez.runtime.library.common.combine.Combiner; import org.apache.tez.runtime.library.conf.OrderedPartitionedKVOutputConfig.SorterImpl; import org.apache.tez.runtime.library.partitioner.HashPartitioner; import org.apache.tez.runtime.library.testutils.RandomTextGenerator; @@ -54,13 +58,15 @@ import java.io.IOException; import java.nio.ByteBuffer; +import java.util.List; import java.util.Map; +import java.util.Random; import java.util.TreeMap; import java.util.UUID; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; -import static org.mockito.Matchers.anyListOf; +import static org.mockito.Mockito.any; import static org.mockito.Mockito.atLeastOnce; import static org.mockito.Mockito.doReturn; import static org.mockito.Mockito.mock; @@ -68,24 +74,27 @@ import static org.mockito.internal.verification.VerificationModeFactory.times; public class TestPipelinedSorter { + private static Configuration conf; private static FileSystem localFs = null; private static Path workDir = null; + private static LocalDirAllocator dirAllocator; private OutputContext outputContext; private int numOutputs; private long initialAvailableMem; //TODO: Need to make it nested structure so that multiple partition cases can be validated - private static TreeMap sortedDataMap = Maps.newTreeMap(); + private static TreeMap sortedDataMap = Maps.newTreeMap(); static { - Configuration conf = getConf(); + conf = getConf(); try { localFs = FileSystem.getLocal(conf); workDir = new Path( new Path(System.getProperty("test.build.data", "/tmp")), TestPipelinedSorter.class.getName()) .makeQualified(localFs.getUri(), localFs.getWorkingDirectory()); + dirAllocator = new LocalDirAllocator(TezRuntimeFrameworkConfigs.LOCAL_DIRS); } catch (IOException e) { throw new RuntimeException(e); } @@ -98,10 +107,11 @@ public static void cleanup() throws IOException { @Before public void setup() throws IOException { + conf = getConf(); ApplicationId appId = ApplicationId.newInstance(10000, 1); TezCounters counters = new TezCounters(); String uniqueId = UUID.randomUUID().toString(); - String auxiliaryService = getConf().get(TezConfiguration.TEZ_AM_SHUFFLE_AUXILIARY_SERVICE_ID, + String auxiliaryService = conf.get(TezConfiguration.TEZ_AM_SHUFFLE_AUXILIARY_SERVICE_ID, TezConfiguration.TEZ_AM_SHUFFLE_AUXILIARY_SERVICE_ID_DEFAULT); this.outputContext = createMockOutputContext(counters, appId, uniqueId, auxiliaryService); } @@ -109,6 +119,7 @@ public void setup() throws IOException { public static Configuration getConf() { Configuration conf = new Configuration(); conf.set("fs.defaultFS", "file:///"); + conf.set(CommonConfigurationKeys.FS_PERMISSIONS_UMASK_KEY, "077"); //To enable PipelinedSorter conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_SORTER_CLASS, SorterImpl.PIPELINED.name()); @@ -137,15 +148,13 @@ public void basicTest() throws IOException { //TODO: need to support multiple partition testing later //# partition, # of keys, size per key, InitialMem, blockSize - Configuration conf = getConf(); conf.setInt(TezRuntimeConfiguration.TEZ_RUNTIME_IO_SORT_MB, 5); basicTest(1, 100000, 100, (10 * 1024l * 1024l), 3 << 20); - + verifyOutputPermissions(outputContext.getUniqueIdentifier()); } @Test public void testWithoutPartitionStats() throws IOException { - Configuration conf = getConf(); conf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_REPORT_PARTITION_STATS, false); //# partition, # of keys, size per key, InitialMem, blockSize basicTest(1, 0, 0, (10 * 1024l * 1024l), 3 << 20); @@ -154,7 +163,6 @@ public void testWithoutPartitionStats() throws IOException { @Test public void testWithEmptyData() throws IOException { - Configuration conf = getConf(); conf.setInt(TezRuntimeConfiguration.TEZ_RUNTIME_IO_SORT_MB, 5); //# partition, # of keys, size per key, InitialMem, blockSize basicTest(1, 0, 0, (10 * 1024l * 1024l), 3 << 20); @@ -164,7 +172,6 @@ public void testWithEmptyData() throws IOException { public void testEmptyDataWithPipelinedShuffle() throws IOException { this.numOutputs = 1; this.initialAvailableMem = 1 *1024 * 1024; - Configuration conf = getConf(); conf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_ENABLE_FINAL_MERGE_IN_OUTPUT, false); conf.setInt(TezRuntimeConfiguration .TEZ_RUNTIME_PIPELINED_SORTER_MIN_BLOCK_SIZE_IN_MB, 1); @@ -205,7 +212,6 @@ public void testEmptyPartitionsHelper(int numKeys, boolean sendEmptyPartitionDet int partitions = 50; this.numOutputs = partitions; this.initialAvailableMem = 1 *1024 * 1024; - Configuration conf = getConf(); conf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_EMPTY_PARTITION_INFO_VIA_EVENTS_ENABLED, sendEmptyPartitionDetails); conf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_ENABLE_FINAL_MERGE_IN_OUTPUT, true); conf.setInt(TezRuntimeConfiguration @@ -220,6 +226,7 @@ public void testEmptyPartitionsHelper(int numKeys, boolean sendEmptyPartitionDet assertTrue(sorter.getNumSpills() == numKeys + 1); } verifyCounters(sorter, outputContext); + verifyOutputPermissions(outputContext.getUniqueIdentifier()); Path indexFile = sorter.getFinalIndexFile(); TezSpillRecord spillRecord = new TezSpillRecord(indexFile, conf); for (int i = 0; i < partitions; i++) { @@ -262,7 +269,6 @@ public void testKVExceedsBuffer2() throws IOException { @Test public void testExceedsKVWithMultiplePartitions() throws IOException { - Configuration conf = getConf(); conf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_ENABLE_FINAL_MERGE_IN_OUTPUT, true); this.numOutputs = 5; this.initialAvailableMem = 1 * 1024 * 1024; @@ -273,13 +279,13 @@ public void testExceedsKVWithMultiplePartitions() throws IOException { writeData(sorter, 100, 1<<20); verifyCounters(sorter, outputContext); + verifyOutputPermissions(outputContext.getUniqueIdentifier()); } @Test public void testExceedsKVWithPipelinedShuffle() throws IOException { this.numOutputs = 1; this.initialAvailableMem = 1 *1024 * 1024; - Configuration conf = getConf(); conf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_ENABLE_FINAL_MERGE_IN_OUTPUT, false); conf.setInt(TezRuntimeConfiguration .TEZ_RUNTIME_PIPELINED_SORTER_MIN_BLOCK_SIZE_IN_MB, 1); @@ -299,7 +305,6 @@ public void testExceedsKVWithPipelinedShuffle() throws IOException { public void test_TEZ_2602_50mb() throws IOException { this.numOutputs = 1; this.initialAvailableMem = 1 *1024 * 1024; - Configuration conf = getConf(); conf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_ENABLE_FINAL_MERGE_IN_OUTPUT, true); conf.setInt(TezRuntimeConfiguration .TEZ_RUNTIME_PIPELINED_SORTER_MIN_BLOCK_SIZE_IN_MB, 1); @@ -316,13 +321,13 @@ public void test_TEZ_2602_50mb() throws IOException { sorter.flush(); sorter.close(); + verifyOutputPermissions(outputContext.getUniqueIdentifier()); } //@Test public void testLargeDataWithMixedKV() throws IOException { this.numOutputs = 1; this.initialAvailableMem = 48 *1024 * 1024; - Configuration conf = getConf(); conf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_ENABLE_FINAL_MERGE_IN_OUTPUT, true); PipelinedSorter sorter = new PipelinedSorter(this.outputContext, conf, numOutputs, initialAvailableMem); @@ -344,6 +349,7 @@ public void testLargeDataWithMixedKV() throws IOException { sorter.flush(); sorter.close(); + verifyOutputPermissions(outputContext.getUniqueIdentifier()); } @@ -380,7 +386,6 @@ public void testWithVariableKVLength2() throws IOException { @Test public void testWithCustomComparator() throws IOException { //Test with custom comparator - Configuration conf = getConf(); conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_KEY_COMPARATOR_CLASS, CustomComparator.class.getName()); basicTest(1, 100000, 100, (10 * 1024l * 1024l), 3 << 20); @@ -390,7 +395,6 @@ public void testWithCustomComparator() throws IOException { public void testWithPipelinedShuffle() throws IOException { this.numOutputs = 1; this.initialAvailableMem = 5 *1024 * 1024; - Configuration conf = getConf(); conf.setInt(TezRuntimeConfiguration.TEZ_RUNTIME_IO_SORT_MB, 5); conf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_ENABLE_FINAL_MERGE_IN_OUTPUT, false); conf.setInt(TezRuntimeConfiguration @@ -399,17 +403,19 @@ public void testWithPipelinedShuffle() throws IOException { initialAvailableMem); //Write 100 keys each of size 10 - writeData(sorter, 10000, 100); + writeData(sorter, 10000, 100, false); + sorter.flush(); + List events = sorter.close(); //final merge is disabled. Final output file would not be populated in this case. assertTrue(sorter.finalOutputFile == null); conf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_ENABLE_FINAL_MERGE_IN_OUTPUT, true); - verify(outputContext, times(1)).sendEvents(anyListOf(Event.class)); + verify(outputContext, times(0)).sendEvents(any()); + assertTrue(events.size() > 0); } @Test public void testCountersWithMultiplePartitions() throws IOException { - Configuration conf = getConf(); conf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_ENABLE_FINAL_MERGE_IN_OUTPUT, true); this.numOutputs = 5; this.initialAvailableMem = 5 * 1024 * 1024; @@ -420,11 +426,11 @@ public void testCountersWithMultiplePartitions() throws IOException { writeData(sorter, 10000, 100); verifyCounters(sorter, outputContext); + verifyOutputPermissions(outputContext.getUniqueIdentifier()); } @Test public void testMultipleSpills() throws IOException { - Configuration conf = getConf(); conf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_ENABLE_FINAL_MERGE_IN_OUTPUT, true); this.numOutputs = 5; this.initialAvailableMem = 5 * 1024 * 1024; @@ -436,11 +442,48 @@ public void testMultipleSpills() throws IOException { writeData(sorter, 25000, 1000); assertFalse("Expecting needsRLE to be false", sorter.needsRLE()); verifyCounters(sorter, outputContext); + verifyOutputPermissions(outputContext.getUniqueIdentifier()); + } + + @Test + public void testWithCombiner() throws IOException { + conf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_ENABLE_FINAL_MERGE_IN_OUTPUT, true); + conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_COMBINER_CLASS, DummyCombiner.class.getName()); + this.numOutputs = 5; + this.initialAvailableMem = 5 * 1024 * 1024; + conf.setInt(TezRuntimeConfiguration + .TEZ_RUNTIME_PIPELINED_SORTER_MIN_BLOCK_SIZE_IN_MB, 3); + PipelinedSorter sorter = new PipelinedSorter(this.outputContext, conf, numOutputs, + initialAvailableMem); + + writeData(sorter, 1, 20); + + Path outputFile = sorter.finalOutputFile; + FileSystem fs = outputFile.getFileSystem(conf); + IFile.Reader reader = new IFile.Reader(fs, outputFile, null, null, null, false, -1, 4096); + verifyData(reader); + reader.close(); + + verifyCounters(sorter, outputContext); + verifyOutputPermissions(outputContext.getUniqueIdentifier()); + } + + // for testWithCombiner + public static class DummyCombiner implements Combiner { + public DummyCombiner(TaskContext ctx) { + // do nothing + } + + @Override + public void combine(TezRawKeyValueIterator rawIter, IFile.Writer writer) throws InterruptedException, IOException { + while (rawIter.next()) { + writer.append(rawIter.getKey(), rawIter.getValue()); + } + } } @Test public void testMultipleSpills_WithRLE() throws IOException { - Configuration conf = getConf(); conf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_ENABLE_FINAL_MERGE_IN_OUTPUT, true); this.numOutputs = 5; this.initialAvailableMem = 5 * 1024 * 1024; @@ -452,12 +495,39 @@ public void testMultipleSpills_WithRLE() throws IOException { writeSimilarKeys(sorter, 25000, 1000, true); assertTrue("Expecting needsRLE to be true", sorter.needsRLE()); verifyCounters(sorter, outputContext); + verifyOutputPermissions(outputContext.getUniqueIdentifier()); + } + + @Test + /** + * Verify whether all buffers are used evenly in sorter. + */ + public void basicTestForBufferUsage() throws IOException { + this.numOutputs = 1; + conf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_PIPELINED_SORTER_LAZY_ALLOCATE_MEMORY, true); + + PipelinedSorter sorter = new PipelinedSorter(this.outputContext, conf, numOutputs, (100 << 20)); + Assert.assertTrue(sorter.maxNumberOfBlocks >= 2); + + // Start filling in with data 1MB Key, 1MB Val. + for (int i = 0; i < 200; i++) { + writeData(sorter, 1, 1024 * 1024, false); + } + + // Check if all buffers are evenly used + int avg = (int) sorter.bufferUsage.stream().mapToDouble(d -> d).average().orElse(0.0); + + for(int i = 0; i< sorter.bufferUsage.size(); i++) { + int usage = sorter.bufferUsage.get(i); + Assert.assertTrue("Buffer index " + i + " is not used correctly. " + + " usage: " + usage + ", avg: " + avg, usage >= avg); + } + conf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_PIPELINED_SORTER_LAZY_ALLOCATE_MEMORY, false); } public void basicTest2(int partitions, int[] numkeys, int[] keysize, long initialAvailableMem, int blockSize) throws IOException { this.numOutputs = partitions; // single output - Configuration conf = getConf(); conf.setInt(TezRuntimeConfiguration .TEZ_RUNTIME_PIPELINED_SORTER_MIN_BLOCK_SIZE_IN_MB, 100); PipelinedSorter sorter = new PipelinedSorter(this.outputContext, conf, numOutputs, @@ -472,21 +542,21 @@ private void writeData2(ExternalSorter sorter, int counter = 0; for (int numkey : numKeys) { int curKeyLen = keyLen[counter]; + char[] buffer = new char[curKeyLen]; for (int i = 0; i < numkey; i++) { - Text key = new Text(RandomStringUtils.randomAlphanumeric(curKeyLen)); - Text value = new Text(RandomStringUtils.randomAlphanumeric(curKeyLen)); - sorter.write(key, value); + Text random = new Text(randomAlphanumeric(buffer)); + sorter.write(random, random); } counter++; } sorter.flush(); sorter.close(); + verifyOutputPermissions(outputContext.getUniqueIdentifier()); } public void basicTest(int partitions, int numKeys, int keySize, long initialAvailableMem, int minBlockSize) throws IOException { this.numOutputs = partitions; // single output - Configuration conf = getConf(); conf.setInt(TezRuntimeConfiguration .TEZ_RUNTIME_PIPELINED_SORTER_MIN_BLOCK_SIZE_IN_MB, minBlockSize >> 20); PipelinedSorter sorter = new PipelinedSorter(this.outputContext, conf, numOutputs, @@ -504,6 +574,7 @@ public void basicTest(int partitions, int numKeys, int keySize, } verifyCounters(sorter, outputContext); + verifyOutputPermissions(outputContext.getUniqueIdentifier()); Path outputFile = sorter.finalOutputFile; FileSystem fs = outputFile.getFileSystem(conf); TezCounter finalOutputBytes = @@ -557,7 +628,6 @@ private void verifyCounters(PipelinedSorter sorter, OutputContext context) { //Its not possible to allocate > 2 GB in test environment. Carry out basic checks here. public void memTest() throws IOException { //Verify if > 2 GB can be set via config - Configuration conf = getConf(); conf.setInt(TezRuntimeConfiguration.TEZ_RUNTIME_IO_SORT_MB, 3076); long size = ExternalSorter.getInitialMemoryRequirement(conf, 4096 * 1024 * 1024l); Assert.assertTrue(size == (3076l << 20)); @@ -642,7 +712,6 @@ public void memTest() throws IOException { //Intentionally not having timeout public void test_without_lazyMemAllocation() throws IOException { this.numOutputs = 10; - Configuration conf = getConf(); //128 MB. Pre-allocate. Request for default block size. Get 1 buffer conf.setInt(TezRuntimeConfiguration.TEZ_RUNTIME_IO_SORT_MB, 128); @@ -683,7 +752,6 @@ public void test_without_lazyMemAllocation() throws IOException { //Intentionally not having timeout public void test_with_lazyMemAllocation() throws IOException { this.numOutputs = 10; - Configuration conf = getConf(); //128 MB. Do not pre-allocate. // Get 32 MB buffer first and the another buffer with 96 on filling up @@ -743,7 +811,6 @@ public void test_with_lazyMemAllocation() throws IOException { //Intentionally not having timeout public void testLazyAllocateMem() throws IOException { this.numOutputs = 10; - Configuration conf = getConf(); conf.setInt(TezRuntimeConfiguration.TEZ_RUNTIME_IO_SORT_MB, 128); conf.setBoolean(TezRuntimeConfiguration .TEZ_RUNTIME_PIPELINED_SORTER_LAZY_ALLOCATE_MEMORY, false); @@ -791,6 +858,43 @@ public void testWithLargeKeyValueWithMinBlockSize() throws IOException { basicTest(1, 5, (2 << 20), (48 * 1024l * 1024l), 16 << 20); } + @Test + public void testWithLargeRecordAndLowMemory() throws IOException { + this.numOutputs = 1; + this.initialAvailableMem = 1 * 1024 * 1024; + conf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_ENABLE_FINAL_MERGE_IN_OUTPUT, true); + conf.setInt(TezRuntimeConfiguration.TEZ_RUNTIME_PIPELINED_SORTER_MIN_BLOCK_SIZE_IN_MB, 1); + PipelinedSorter sorter = new PipelinedSorter(this.outputContext, conf, numOutputs, initialAvailableMem); + + // Set the record size to exceed 2k to trigger bug described in TEZ-4542. + StringBuilder builder = new StringBuilder(); + for (int i = 0; i < 3072; i++) { + builder.append("1"); + } + Text value = new Text(builder.toString()); + long size = 50 * 1024 * 1024; + while (size > 0) { + Text key = RandomTextGenerator.generateSentence(); + sorter.write(key, value); + size -= key.getLength(); + } + + sorter.flush(); + sorter.close(); + verifyOutputPermissions(outputContext.getUniqueIdentifier()); + } + + private void verifyOutputPermissions(String spillId) throws IOException { + String subpath = Constants.TEZ_RUNTIME_TASK_OUTPUT_DIR + "/" + spillId + + "/" + Constants.TEZ_RUNTIME_TASK_OUTPUT_FILENAME_STRING; + Path outputPath = dirAllocator.getLocalPathToRead(subpath, conf); + Path indexPath = dirAllocator.getLocalPathToRead(subpath + Constants.TEZ_RUNTIME_TASK_OUTPUT_INDEX_SUFFIX_STRING, conf); + Assert.assertEquals("Incorrect output permissions", (short)0640, + localFs.getFileStatus(outputPath).getPermission().toShort()); + Assert.assertEquals("Incorrect index permissions", (short)0640, + localFs.getFileStatus(indexPath).getPermission().toShort()); + } + private void writeData(ExternalSorter sorter, int numKeys, int keyLen) throws IOException { writeData(sorter, numKeys, keyLen, true); } @@ -799,29 +903,41 @@ private void writeData(ExternalSorter sorter, int numKeys, int keyLen) throws IO private void writeSimilarKeys(ExternalSorter sorter, int numKeys, int keyLen, boolean autoClose) throws IOException { sortedDataMap.clear(); - String keyStr = RandomStringUtils.randomAlphanumeric(keyLen); + char[] buffer = new char[keyLen]; + String keyStr = randomAlphanumeric(buffer); for (int i = 0; i < numKeys; i++) { if (i % 4 == 0) { - keyStr = RandomStringUtils.randomAlphanumeric(keyLen); + keyStr = randomAlphanumeric(buffer); } Text key = new Text(keyStr); Text value = new Text(RandomStringUtils.randomAlphanumeric(keyLen)); sorter.write(key, value); - sortedDataMap.put(key.toString(), value.toString()); //for verifying data later + sortedDataMap.put(key, value); //for verifying data later } if (autoClose) { closeSorter(sorter); } } + static private final Random RANDOM = new Random(); + int start = ' '; + int end = 'z' + 1; + int gap = end - start; + private String randomAlphanumeric(char[] buffer) { + for (int i = 0; i < buffer.length; ++i) { + buffer[i] = (char)(RANDOM.nextInt(gap) + start); + } + return new String(buffer); + } private void writeData(ExternalSorter sorter, int numKeys, int keyLen, boolean autoClose) throws IOException { + char[] buffer = new char[keyLen]; sortedDataMap.clear(); for (int i = 0; i < numKeys; i++) { - Text key = new Text(RandomStringUtils.randomAlphanumeric(keyLen)); - Text value = new Text(RandomStringUtils.randomAlphanumeric(keyLen)); - sorter.write(key, value); - sortedDataMap.put(key.toString(), value.toString()); //for verifying data later + String randomStr = randomAlphanumeric(buffer); + Text random = new Text(randomStr); + sorter.write(random, random); + sortedDataMap.put(random, random); //for verifying data later } if (autoClose) { closeSorter(sorter); @@ -841,7 +957,6 @@ private void verifyData(IFile.Reader reader) Text readValue = new Text(); DataInputBuffer keyIn = new DataInputBuffer(); DataInputBuffer valIn = new DataInputBuffer(); - Configuration conf = getConf(); SerializationFactory serializationFactory = new SerializationFactory(conf); Deserializer keyDeserializer = serializationFactory.getDeserializer(Text.class); Deserializer valDeserializer = serializationFactory.getDeserializer(Text.class); @@ -850,15 +965,15 @@ private void verifyData(IFile.Reader reader) int numRecordsRead = 0; - for (Map.Entry entry : sortedDataMap.entrySet()) { - String key = entry.getKey(); - String val = entry.getValue(); + for (Map.Entry entry : sortedDataMap.entrySet()) { + Text key = entry.getKey(); + Text val = entry.getValue(); if (reader.nextRawKey(keyIn)) { reader.nextRawValue(valIn); readKey = keyDeserializer.deserialize(readKey); readValue = valDeserializer.deserialize(readValue); - Assert.assertTrue(key.equalsIgnoreCase(readKey.toString())); - Assert.assertTrue(val.equalsIgnoreCase(readValue.toString())); + Assert.assertTrue(key.equals(readKey)); + Assert.assertTrue(val.equals(readValue)); numRecordsRead++; } } diff --git a/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/sort/impl/TestTezMerger.java b/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/sort/impl/TestTezMerger.java index b35c85f574..6512853c59 100644 --- a/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/sort/impl/TestTezMerger.java +++ b/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/sort/impl/TestTezMerger.java @@ -18,7 +18,7 @@ package org.apache.tez.runtime.library.common.sort.impl; -import com.google.common.base.Preconditions; +import org.apache.tez.common.Preconditions; import com.google.common.collect.LinkedListMultimap; import com.google.common.collect.ListMultimap; import com.google.common.collect.Lists; @@ -36,11 +36,13 @@ import org.apache.hadoop.io.RawComparator; import org.apache.hadoop.io.Text; import org.apache.hadoop.io.WritableComparator; +import org.apache.hadoop.io.serializer.WritableSerialization; import org.apache.hadoop.util.Progress; import org.apache.hadoop.util.Progressable; import org.apache.tez.common.TezRuntimeFrameworkConfigs; import org.apache.tez.runtime.library.api.TezRuntimeConfiguration; import org.apache.tez.runtime.library.common.ConfigUtils; +import org.apache.tez.runtime.library.common.serializer.SerializationContext; import org.apache.tez.runtime.library.common.shuffle.orderedgrouped.InMemoryReader; import org.apache.tez.runtime.library.common.shuffle.orderedgrouped.InMemoryWriter; import org.apache.tez.runtime.library.common.shuffle.orderedgrouped.MergeManager; @@ -57,6 +59,7 @@ import java.util.Map; import java.util.Random; +import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; import static org.mockito.Mockito.mock; @@ -64,24 +67,24 @@ public class TestTezMerger { private static final Logger LOG = LoggerFactory.getLogger(TestTezMerger.class); - private static Configuration defaultConf = new Configuration(); - private static FileSystem localFs = null; - private static Path workDir = null; - private static RawComparator comparator = null; - private static Random rnd = new Random(); + private static final Configuration DEFAULT_CONF = new Configuration(); + private static FileSystem localFs; + private static Path workDir; + private static RawComparator comparator; + private static final Random RND = new Random(); private static final String SAME_KEY = "SAME_KEY"; private static final String DIFF_KEY = "DIFF_KEY"; //store the generated data for final verification - private static ListMultimap verificationDataSet = LinkedListMultimap.create(); + private static final ListMultimap VERIFICATION_DATA_SET = LinkedListMultimap.create(); - private MergeManager merger = mock(MergeManager.class); + private final MergeManager merger = mock(MergeManager.class); static { - defaultConf.set("fs.defaultFS", "file:///"); + DEFAULT_CONF.set("fs.defaultFS", "file:///"); try { - localFs = FileSystem.getLocal(defaultConf); + localFs = FileSystem.getLocal(DEFAULT_CONF); workDir = new Path( new Path(System.getProperty("test.build.data", "/tmp")), TestTezMerger.class.getName()) .makeQualified(localFs.getUri(), localFs.getWorkingDirectory()); @@ -89,12 +92,12 @@ public class TestTezMerger { } catch (IOException e) { throw new RuntimeException(e); } - defaultConf.set(TezRuntimeConfiguration.TEZ_RUNTIME_KEY_CLASS, IntWritable.class.getName()); - defaultConf.set(TezRuntimeConfiguration.TEZ_RUNTIME_VALUE_CLASS, LongWritable.class.getName()); + DEFAULT_CONF.set(TezRuntimeConfiguration.TEZ_RUNTIME_KEY_CLASS, IntWritable.class.getName()); + DEFAULT_CONF.set(TezRuntimeConfiguration.TEZ_RUNTIME_VALUE_CLASS, LongWritable.class.getName()); Path baseDir = new Path(workDir, TestMergeManager.class.getName()); String localDirs = baseDir.toString(); - defaultConf.setStrings(TezRuntimeFrameworkConfigs.LOCAL_DIRS, localDirs); - comparator = ConfigUtils.getIntermediateInputKeyComparator(defaultConf); + DEFAULT_CONF.setStrings(TezRuntimeFrameworkConfigs.LOCAL_DIRS, localDirs); + comparator = ConfigUtils.getIntermediateInputKeyComparator(DEFAULT_CONF); } @AfterClass @@ -104,7 +107,7 @@ public static void cleanup() throws Exception { @Test(timeout = 80000) public void testMerge() throws Exception { - /** + /* * test with number of files, keys per file and mergefactor */ @@ -126,17 +129,16 @@ public void testMerge() throws Exception { merge(5, 1000, 100); //Create random mix of files (empty files + files with keys) - List pathList = new LinkedList(); - pathList.clear(); - pathList.addAll(createIFiles(Math.max(2, rnd.nextInt(20)), 0)); - pathList.addAll(createIFiles(Math.max(2, rnd.nextInt(20)), Math.max(2, rnd.nextInt(10)))); - merge(pathList, Math.max(2, rnd.nextInt(10))); + List pathList = new LinkedList<>(); + pathList.addAll(createIFiles(Math.max(2, RND.nextInt(20)), 0)); + pathList.addAll(createIFiles(Math.max(2, RND.nextInt(20)), Math.max(2, RND.nextInt(10)))); + merge(pathList, Math.max(2, RND.nextInt(10))); } private Path createIFileWithTextData(List data) throws IOException { Path path = new Path(workDir + "/src", "data_" + System.nanoTime() + ".out"); FSDataOutputStream out = localFs.create(path); - IFile.Writer writer = new IFile.Writer(defaultConf, out, Text.class, + IFile.Writer writer = new IFile.Writer(new WritableSerialization(), new WritableSerialization(), out, Text.class, Text.class, null, null, null, true); for (String key : data) { writer.append(new Text(key), new Text(key + "_" + System.nanoTime())); @@ -148,10 +150,6 @@ private Path createIFileWithTextData(List data) throws IOException { /** * Verify if the records are as per the expected data set - * - * @param records - * @param expectedResult - * @throws IOException */ private void verify(TezRawKeyValueIterator records, String[][] expectedResult) throws IOException { @@ -166,7 +164,7 @@ private void verify(TezRawKeyValueIterator records, String[][] expectedResult) Text v = new Text(); v.readFields(value); - assertTrue(k.toString().equals(expectedResult[i][0])); + assertEquals(k.toString(), expectedResult[i][0]); String correctResult = expectedResult[i][1]; @@ -184,7 +182,7 @@ private void verify(TezRawKeyValueIterator records, String[][] expectedResult) @Test(timeout = 5000) public void testWithCustomComparator_WithEmptyStrings() throws Exception { - List pathList = new LinkedList(); + List pathList = new LinkedList<>(); List data = Lists.newLinkedList(); //Merge datasets with custom comparator RawComparator rc = new CustomComparator(); @@ -234,7 +232,7 @@ public void testWithCustomComparator_WithEmptyStrings() throws Exception { @Test(timeout = 5000) public void testWithCustomComparator_No_RLE() throws Exception { - List pathList = new LinkedList(); + List pathList = new LinkedList<>(); List data = Lists.newLinkedList(); //Merge datasets with custom comparator RawComparator rc = new CustomComparator(); @@ -283,14 +281,13 @@ public void testWithCustomComparator_No_RLE() throws Exception { @Test(timeout = 5000) public void testWithCustomComparator_RLE_acrossFiles() throws Exception { - List pathList = new LinkedList(); + List pathList = new LinkedList<>(); List data = Lists.newLinkedList(); LOG.info("Test with custom comparator with RLE spanning across segment boundaries"); //Test with 2 files, where the RLE keys can span across files //First file - data.clear(); data.add("0"); data.add("0"); pathList.add(createIFileWithTextData(data)); @@ -323,14 +320,13 @@ public void testWithCustomComparator_RLE_acrossFiles() throws Exception { @Test(timeout = 5000) public void testWithCustomComparator_mixedFiles() throws Exception { - List pathList = new LinkedList(); + List pathList = new LinkedList<>(); List data = Lists.newLinkedList(); LOG.info("Test with custom comparator with mixed set of segments (empty, non-empty etc)"); //Test with 2 files, where the RLE keys can span across files //First file - data.clear(); data.add("0"); pathList.add(createIFileWithTextData(data)); @@ -372,7 +368,7 @@ public void testWithCustomComparator_mixedFiles() throws Exception { @Test(timeout = 5000) public void testWithCustomComparator_RLE() throws Exception { - List pathList = new LinkedList(); + List pathList = new LinkedList<>(); List data = Lists.newLinkedList(); LOG.info("Test with custom comparator 2 files one containing RLE and also other segment " @@ -380,7 +376,6 @@ public void testWithCustomComparator_RLE() throws Exception { //Test with 2 files, same keys in middle of file //First file - data.clear(); data.add("1"); data.add("2"); data.add("2"); @@ -411,7 +406,7 @@ public void testWithCustomComparator_RLE() throws Exception { @Test(timeout = 5000) public void testWithCustomComparator_RLE2() throws Exception { - List pathList = new LinkedList(); + List pathList = new LinkedList<>(); List data = Lists.newLinkedList(); LOG.info( @@ -419,7 +414,6 @@ public void testWithCustomComparator_RLE2() throws Exception { //Test with 3 files, same keys in middle of file //First file - data.clear(); data.add("0"); data.add("1"); data.add("1"); @@ -460,7 +454,7 @@ public void testWithCustomComparator_RLE2() throws Exception { @Test(timeout = 5000) public void testWithCustomComparator() throws Exception { - List pathList = new LinkedList(); + List pathList = new LinkedList<>(); List data = Lists.newLinkedList(); LOG.info( @@ -468,7 +462,6 @@ public void testWithCustomComparator() throws Exception { //Test with 3 files //First file - data.clear(); data.add("0"); pathList.add(createIFileWithTextData(data)); @@ -498,14 +491,13 @@ public void testWithCustomComparator() throws Exception { @Test(timeout = 5000) public void testWithCustomComparator_RLE3() throws Exception { - List pathList = new LinkedList(); + List pathList = new LinkedList<>(); List data = Lists.newLinkedList(); LOG.info("Test with custom comparator"); //Test with 3 files, same keys in middle of file //First file - data.clear(); data.add("0"); pathList.add(createIFileWithTextData(data)); @@ -533,7 +525,7 @@ public void testWithCustomComparator_RLE3() throws Exception { @Test(timeout = 5000) public void testWithCustomComparator_allEmptyFiles() throws Exception { - List pathList = new LinkedList(); + List pathList = new LinkedList<>(); List data = Lists.newLinkedList(); LOG.info("Test with custom comparator where all files are empty"); @@ -559,20 +551,15 @@ public void testWithCustomComparator_allEmptyFiles() throws Exception { /** * Merge the data sets - * - * @param pathList - * @param rc - * @return - * @throws IOException */ private TezRawKeyValueIterator merge(List pathList, RawComparator rc) throws IOException, InterruptedException { - TezMerger merger = new TezMerger(); - TezRawKeyValueIterator records = merger.merge(defaultConf, localFs, IntWritable.class, - LongWritable.class, null, false, 0, 1024, pathList.toArray(new Path[pathList.size()]), - true, 4, new Path(workDir, "tmp_" + System.nanoTime()), ((rc == null) ? comparator : rc), - new Reporter(), null, null, - null, new Progress()); + TezRawKeyValueIterator records = TezMerger.merge(DEFAULT_CONF, localFs, + new SerializationContext(IntWritable.class, LongWritable.class, new WritableSerialization(), + new WritableSerialization()), + null, false, 0, 1024, pathList.toArray(new Path[pathList.size()]), true, 4, + new Path(workDir, "tmp_" + System.nanoTime()), ((rc == null) ? comparator : rc), + new Reporter(), null, null, null, new Progress()); return records; } @@ -601,16 +588,15 @@ private void merge(int fileCount, int keysPerFile, int mergeFactor) throws Excep private void merge(List pathList, int mergeFactor, RawComparator rc) throws Exception { //Merge datasets - TezMerger merger = new TezMerger(); - TezRawKeyValueIterator records = merger.merge(defaultConf, localFs, IntWritable.class, - LongWritable.class, null, false, 0, 1024, pathList.toArray(new Path[pathList.size()]), - true, mergeFactor, new Path(workDir, "tmp_" + System.nanoTime()), - ((rc == null) ? comparator : rc), new Reporter(), null, null, - null, - new Progress()); + TezRawKeyValueIterator records = TezMerger.merge(DEFAULT_CONF, localFs, + new SerializationContext(IntWritable.class, LongWritable.class, new WritableSerialization(), + new WritableSerialization()), + null, false, 0, 1024, pathList.toArray(new Path[pathList.size()]), true, mergeFactor, + new Path(workDir, "tmp_" + System.nanoTime()), ((rc == null) ? comparator : rc), + new Reporter(), null, null, null, new Progress()); verifyData(records); - verificationDataSet.clear(); + VERIFICATION_DATA_SET.clear(); } private void verifyData(TezRawKeyValueIterator records) throws IOException { @@ -629,9 +615,9 @@ private void verifyData(TezRawKeyValueIterator records) throws IOException { if (records.isSameKey()) { LOG.info("\tSame Key : key=" + k.get() + ", val=" + v.get()); //More than one key should be present in the source data - assertTrue(verificationDataSet.get(k.get()).size() > 1); + assertTrue(VERIFICATION_DATA_SET.get(k.get()).size() > 1); //Ensure this is same as the previous key we saw - assertTrue("previousKey=" + pk + ", current=" + k.get(), pk == k.get()); + assertEquals("previousKey=" + pk + ", current=" + k.get(), pk, k.get()); } else { LOG.info("key=" + k.get() + ", val=" + v.get()); } @@ -642,21 +628,20 @@ private void verifyData(TezRawKeyValueIterator records) throws IOException { } //Verify if the number of distinct entries is the same in source and the test - assertTrue("dataMap=" + dataMap.keySet().size() + ", verificationSet=" + - verificationDataSet.keySet().size(), - dataMap.keySet().size() == verificationDataSet.keySet().size()); + assertEquals("dataMap=" + dataMap.keySet().size() + ", verificationSet=" + + VERIFICATION_DATA_SET.keySet().size(), dataMap.keySet().size(), VERIFICATION_DATA_SET.keySet().size()); //Verify with source data - for (Integer key : verificationDataSet.keySet()) { - assertTrue("Data size for " + key + " not matching with source; dataSize:" + dataMap - .get(key).intValue() + ", source:" + verificationDataSet.get(key).size(), - dataMap.get(key).intValue() == verificationDataSet.get(key).size()); + for (Integer key : VERIFICATION_DATA_SET.keySet()) { + assertEquals("Data size for " + key + " not matching with source; dataSize:" + dataMap + .get(key) + ", source:" + VERIFICATION_DATA_SET.get(key).size(), + (int) dataMap.get(key), VERIFICATION_DATA_SET.get(key).size()); } //Verify if every key has the same number of repeated items in the source dataset as well for (Map.Entry entry : dataMap.entrySet()) { - assertTrue(entry.getKey() + "", verificationDataSet.get(entry.getKey()).size() == entry - .getValue()); + assertEquals(entry.getKey() + "", VERIFICATION_DATA_SET.get(entry.getKey()).size(), (int) entry + .getValue()); } LOG.info("******************"); @@ -680,17 +665,17 @@ public void testMergeSegments() throws Exception { segments.addAll(createInMemorySegments(10, 100)); segments.addAll(createDiskSegments(10, 100)); mergeSegments(segments, 5, true); - verificationDataSet.clear(); + VERIFICATION_DATA_SET.clear(); segments.clear(); segments.addAll(createDiskSegments(10, 100)); mergeSegments(segments, 5, true); - verificationDataSet.clear(); + VERIFICATION_DATA_SET.clear(); segments.clear(); segments.addAll(createInMemorySegments(3, 100)); mergeSegments(segments, 5, false); - verificationDataSet.clear(); + VERIFICATION_DATA_SET.clear(); segments.clear(); } @@ -698,12 +683,13 @@ public void testMergeSegments() throws Exception { private void mergeSegments(List segmentList, int mergeFactor, boolean hasDiskSegments) throws Exception { //Merge datasets - TezMerger.MergeQueue mergeQueue = new TezMerger.MergeQueue(defaultConf, localFs, segmentList, + TezMerger.MergeQueue mergeQueue = new TezMerger.MergeQueue(DEFAULT_CONF, localFs, segmentList, comparator, new Reporter(), false, false); - TezRawKeyValueIterator records = mergeQueue.merge(IntWritable.class, LongWritable.class, - mergeFactor, new Path(workDir, "tmp_" - + System.nanoTime()), null, null, null, null); + TezRawKeyValueIterator records = mergeQueue.merge( + new SerializationContext(IntWritable.class, LongWritable.class, new WritableSerialization(), + new WritableSerialization()), + mergeFactor, new Path(workDir, "tmp_" + System.nanoTime()), null, null, null, null); //Verify the merged data is correct verifyData(records); @@ -712,7 +698,7 @@ mergeFactor, new Path(workDir, "tmp_" int diskBufLen = mergeQueue.diskIFileValue.getLength(); assertTrue(diskBufLen + " disk buf length should be > 0", (hasDiskSegments == diskBufLen > 0)); - verificationDataSet.clear(); + VERIFICATION_DATA_SET.clear(); } private List createInMemorySegments(int segmentCount, int keysPerSegment) @@ -746,7 +732,7 @@ private void populateData(IntWritable intKey, LongWritable longVal, DataInputBuf longVal.write(v); key.reset(k.getData(), 0, k.getLength()); value.reset(v.getData(), 0, v.getLength()); - verificationDataSet.put(intKey.get(), longVal.get()); + VERIFICATION_DATA_SET.put(intKey.get(), longVal.get()); } private List createDiskSegments(int segmentCount, int keysPerSegment) throws @@ -770,13 +756,13 @@ static Path writeIFile(int keysPerFile, int repeatCount) throws Path path = new Path(workDir + "/src", "data_" + System.nanoTime() + ".out"); FSDataOutputStream out = localFs.create(path); //create IFile with RLE - IFile.Writer writer = new IFile.Writer(defaultConf, out, IntWritable.class - , LongWritable.class, null, null, null, true); + IFile.Writer writer = new IFile.Writer(new WritableSerialization(), new WritableSerialization(), + out, IntWritable.class, LongWritable.class, null, null, null, true); for (Integer key : dataSet.keySet()) { for (Long value : dataSet.get(key)) { writer.append(new IntWritable(key), new LongWritable(value)); - verificationDataSet.put(key, value); + VERIFICATION_DATA_SET.put(key, value); } } writer.close(); @@ -789,7 +775,6 @@ static Path writeIFile(int keysPerFile, int repeatCount) throws * * @param keyCount approximate number of keys to be created * @param repeatCount number of times a key should be repeated - * @return */ static TreeMultimap createDataForIFile(int keyCount, int repeatCount) { TreeMultimap dataSet = TreeMultimap.create(); diff --git a/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/sort/impl/dflt/TestDefaultSorter.java b/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/sort/impl/dflt/TestDefaultSorter.java index 444ebafa05..a56536dfe8 100644 --- a/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/sort/impl/dflt/TestDefaultSorter.java +++ b/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/sort/impl/dflt/TestDefaultSorter.java @@ -18,12 +18,16 @@ package org.apache.tez.runtime.library.common.sort.impl.dflt; +import org.apache.hadoop.fs.LocalDirAllocator; +import org.apache.tez.runtime.library.api.Partitioner; +import org.apache.tez.runtime.library.common.Constants; +import org.apache.tez.runtime.library.common.TezRuntimeUtils; import org.junit.Assert; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; -import static org.mockito.Matchers.any; -import static org.mockito.Matchers.anyLong; +import static org.mockito.Mockito.any; +import static org.mockito.Mockito.anyLong; import static org.mockito.Mockito.atLeastOnce; import static org.mockito.Mockito.doAnswer; import static org.mockito.Mockito.doReturn; @@ -42,12 +46,12 @@ import org.apache.commons.lang.RandomStringUtils; import org.apache.commons.lang.StringUtils; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.CommonConfigurationKeys; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.DataOutputBuffer; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Text; -import org.apache.hadoop.util.StringInterner; import org.apache.tez.common.TezRuntimeFrameworkConfigs; import org.apache.tez.common.TezUtils; import org.apache.tez.common.counters.TaskCounter; @@ -59,7 +63,6 @@ import org.apache.tez.dag.api.UserPayload; import org.apache.tez.runtime.api.Event; import org.apache.tez.runtime.api.ExecutionContext; -import org.apache.tez.runtime.api.MemoryUpdateCallback; import org.apache.tez.runtime.api.OutputContext; import org.apache.tez.runtime.api.OutputStatisticsReporter; import org.apache.tez.runtime.api.events.CompositeDataMovementEvent; @@ -73,6 +76,7 @@ import org.apache.tez.runtime.library.conf.OrderedPartitionedKVOutputConfig.SorterImpl; import org.apache.tez.runtime.library.partitioner.HashPartitioner; import org.apache.tez.runtime.library.shuffle.impl.ShuffleUserPayloads; +import org.apache.tez.util.StringInterner; import org.junit.After; import org.junit.AfterClass; import org.junit.Before; @@ -84,16 +88,19 @@ public class TestDefaultSorter { - private Configuration conf; private static final int PORT = 80; private static final String UniqueID = "UUID"; private static FileSystem localFs = null; private static Path workingDir = null; + private Configuration conf; + private LocalDirAllocator dirAllocator; + @Before public void setup() throws IOException { conf = new Configuration(); + conf.set(CommonConfigurationKeys.FS_PERMISSIONS_UMASK_KEY, "077"); conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_SORTER_CLASS, SorterImpl.LEGACY.name()); // DefaultSorter conf.set("fs.defaultFS", "file:///"); localFs = FileSystem.getLocal(conf); @@ -108,6 +115,7 @@ public void setup() throws IOException { conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_PARTITIONER_CLASS, HashPartitioner.class.getName()); conf.setStrings(TezRuntimeFrameworkConfigs.LOCAL_DIRS, localDirs); + dirAllocator = new LocalDirAllocator(TezRuntimeFrameworkConfigs.LOCAL_DIRS); } @AfterClass @@ -213,7 +221,7 @@ public void testSortLimitsWithLargeRecords() throws IOException { Text key = new Text(i + ""); //Generate random size between 1 MB to 100 MB. int valSize = ThreadLocalRandom.current().nextInt(1 * 1024 * 1024, 100 * 1024 * 1024); - String val = StringInterner.weakIntern(StringUtils.repeat("v", valSize)); + String val = StringInterner.intern(StringUtils.repeat("v", valSize)); sorter.write(key, new Text(val)); i = (i + 1) % 10; } @@ -262,25 +270,35 @@ public void basicTest() throws IOException { conf.setLong(TezRuntimeConfiguration.TEZ_RUNTIME_IO_SORT_MB, 1); context.requestInitialMemory(ExternalSorter.getInitialMemoryRequirement(conf, context.getTotalMemoryAvailableToTask()), handler); - DefaultSorter sorter = new DefaultSorter(context, conf, 5, handler.getMemoryAssigned()); + SorterWrapper sorterWrapper = new SorterWrapper(context, conf, 5, handler.getMemoryAssigned()); + DefaultSorter sorter = sorterWrapper.getSorter(); //Write 1000 keys each of size 1000, (> 1 spill should happen) try { - writeData(sorter, 1000, 1000); + Text[] keys = generateData(1000, 1000); + Text[] values = generateData(1000, 1000); + for (int i = 0; i < keys.length; i++) { + sorterWrapper.writeKeyValue(keys[i], values[i]); + } + sorterWrapper.close(); assertTrue(sorter.getNumSpills() > 2); verifyCounters(sorter, context); } catch(IOException ioe) { fail(ioe.getMessage()); } + + verifyOutputPermissions(context.getUniqueIdentifier()); } @Test(timeout = 30000) public void testEmptyCaseFileLengths() throws IOException { - testEmptyCaseFileLengthsHelper(50, 2, 1, 48); - testEmptyCaseFileLengthsHelper(1, 1, 10, 0); + testEmptyCaseFileLengthsHelper(50, new String[] {"a", "b"}, new String[] {"1", "2"}); + testEmptyCaseFileLengthsHelper(50, new String[] {"a", "a"}, new String[] {"1", "2"}); + testEmptyCaseFileLengthsHelper(50, new String[] {"aaa", "bbb", "aaa"}, new String[] {"1", "2", "3"}); + testEmptyCaseFileLengthsHelper(1, new String[] {"abcdefghij"}, new String[] {"1234567890"}); } - public void testEmptyCaseFileLengthsHelper(int numPartitions, int numKeys, int keyLen, int expectedEmptyPartitions) + public void testEmptyCaseFileLengthsHelper(int numPartitions, String[] keys, String[] values) throws IOException { OutputContext context = createTezOutputContext(); @@ -289,39 +307,49 @@ public void testEmptyCaseFileLengthsHelper(int numPartitions, int numKeys, int k context.requestInitialMemory(ExternalSorter.getInitialMemoryRequirement(conf, context.getTotalMemoryAvailableToTask()), handler); String auxService = conf.get(TezConfiguration.TEZ_AM_SHUFFLE_AUXILIARY_SERVICE_ID, TezConfiguration.TEZ_AM_SHUFFLE_AUXILIARY_SERVICE_ID_DEFAULT); - DefaultSorter sorter = new DefaultSorter(context, conf, numPartitions, handler.getMemoryAssigned()); - try { - writeData(sorter, numKeys, keyLen); - List events = new ArrayList(); - String pathComponent = (context.getUniqueIdentifier() + "_" + 0); - ShuffleUtils.generateEventOnSpill(events, true, true, context, 0, - sorter.indexCacheList.get(0), 0, true, pathComponent, sorter.getPartitionStats(), - sorter.reportDetailedPartitionStats(), auxService, TezCommonUtils.newBestCompressionDeflater()); - - CompositeDataMovementEvent compositeDataMovementEvent = - (CompositeDataMovementEvent) events.get(1); - ByteBuffer bb = compositeDataMovementEvent.getUserPayload(); - ShuffleUserPayloads.DataMovementEventPayloadProto shufflePayload = - ShuffleUserPayloads.DataMovementEventPayloadProto.parseFrom(ByteString.copyFrom(bb)); - - if (shufflePayload.hasEmptyPartitions()) { - byte[] emptyPartitionsBytesString = - TezCommonUtils.decompressByteStringToByteArray( - shufflePayload.getEmptyPartitions()); - BitSet emptyPartitionBitSet = TezUtilsInternal.fromByteArray(emptyPartitionsBytesString); - Assert.assertTrue("Number of empty partitions did not match!", - emptyPartitionBitSet.cardinality() == expectedEmptyPartitions); - } else { - Assert.assertTrue(expectedEmptyPartitions == 0); - } - //4 bytes of header + numKeys* 2 *(keydata.length + keyLength.length) + 2 * 1 byte of EOF_MARKER + 4 bytes of checksum - assertEquals("Unexpected Output File Size!", - localFs.getFileStatus(sorter.getFinalOutputFile()).getLen(), numKeys * (4 + (2 * (2 + keyLen)) + 2 + 4)); - assertTrue(sorter.getNumSpills() == 1); - verifyCounters(sorter, context); - } catch(IOException ioe) { - fail(ioe.getMessage()); + SorterWrapper sorterWrapper = new SorterWrapper(context, conf, numPartitions, handler.getMemoryAssigned()); + DefaultSorter sorter = sorterWrapper.getSorter(); + assertEquals("Key and Values must have the same number of elements", keys.length, values.length); + BitSet keyRLEs = new BitSet(keys.length); + for (int i = 0; i < keys.length; i++) { + boolean isRLE = sorterWrapper.writeKeyValue(new Text(keys[i]), new Text(values[i])); + keyRLEs.set(i, isRLE); } + sorterWrapper.close(); + + List events = new ArrayList<>(); + String pathComponent = (context.getUniqueIdentifier() + "_" + 0); + ShuffleUtils.generateEventOnSpill(events, true, true, context, 0, + sorter.indexCacheList.get(0), 0, true, pathComponent, sorter.getPartitionStats(), + sorter.reportDetailedPartitionStats(), auxService, TezCommonUtils.newBestCompressionDeflater()); + + CompositeDataMovementEvent compositeDataMovementEvent = + (CompositeDataMovementEvent) events.get(1); + ByteBuffer bb = compositeDataMovementEvent.getUserPayload(); + ShuffleUserPayloads.DataMovementEventPayloadProto shufflePayload = + ShuffleUserPayloads.DataMovementEventPayloadProto.parseFrom(ByteString.copyFrom(bb)); + + if (shufflePayload.hasEmptyPartitions()) { + byte[] emptyPartitionsBytesString = + TezCommonUtils.decompressByteStringToByteArray( + shufflePayload.getEmptyPartitions()); + BitSet emptyPartitionBitSet = TezUtilsInternal.fromByteArray(emptyPartitionsBytesString); + Assert.assertEquals("Number of empty partitions did not match!", + emptyPartitionBitSet.cardinality(), sorterWrapper.getEmptyPartitionsCount()); + } else { + Assert.assertEquals(sorterWrapper.getEmptyPartitionsCount(), 0); + } + // Each non-empty partition adds 4 bytes for header, 2 bytes for EOF_MARKER, 4 bytes for checksum + int expectedFileOutLength = sorterWrapper.getNonEmptyPartitionsCount() * 10; + for (int i = 0; i < keys.length; i++) { + // Each Record adds 1 byte for key length, 1 byte Text overhead (length), key.length bytes for key + expectedFileOutLength += keys[i].length() + 2; + // Each Record adds 1 byte for value length, 1 byte Text overhead (length), value.length bytes for value + expectedFileOutLength += values[i].length() + 2; + } + assertEquals("Unexpected Output File Size!", localFs.getFileStatus(sorter.getFinalOutputFile()).getLen(), expectedFileOutLength); + assertEquals(sorter.getNumSpills(), 1); + verifyCounters(sorter, context); } @Test @@ -387,15 +415,22 @@ public void testEmptyPartitionsHelper(int numKeys, boolean sendEmptyPartitionDet context.requestInitialMemory(ExternalSorter.getInitialMemoryRequirement(conf, context.getTotalMemoryAvailableToTask()), handler); int partitions = 50; - DefaultSorter sorter = new DefaultSorter(context, conf, partitions, handler.getMemoryAssigned()); + SorterWrapper sorterWrapper = new SorterWrapper(context, conf, partitions, handler.getMemoryAssigned()); + DefaultSorter sorter = sorterWrapper.getSorter(); - writeData(sorter, numKeys, 1000000); + Text[] keys = generateData(numKeys, 1000000); + Text[] values = generateData(numKeys, 1000000); + for (int i = 0; i < keys.length; i++) { + sorterWrapper.writeKeyValue(keys[i], values[i]); + } + sorterWrapper.close(); if (numKeys == 0) { assertTrue(sorter.getNumSpills() == 1); } else { assertTrue(sorter.getNumSpills() == numKeys); } verifyCounters(sorter, context); + verifyOutputPermissions(context.getUniqueIdentifier()); if (sorter.indexCacheList.size() != 0) { for (int i = 0; i < sorter.getNumSpills(); i++) { TezSpillRecord record = sorter.indexCacheList.get(i); @@ -436,9 +471,15 @@ void testPartitionStats(boolean withStats) throws IOException { MemoryUpdateCallbackHandler handler = new MemoryUpdateCallbackHandler(); context.requestInitialMemory(ExternalSorter.getInitialMemoryRequirement(conf, context.getTotalMemoryAvailableToTask()), handler); - DefaultSorter sorter = new DefaultSorter(context, conf, 1, handler.getMemoryAssigned()); + SorterWrapper sorterWrapper = new SorterWrapper(context, conf, 1, handler.getMemoryAssigned()); + DefaultSorter sorter = sorterWrapper.getSorter(); - writeData(sorter, 1000, 10); + Text[] keys = generateData(1000, 10); + Text[] values = generateData(1000, 10); + for (int i = 0; i < keys.length; i++) { + sorterWrapper.writeKeyValue(keys[i], values[i]); + } + sorterWrapper.close(); assertTrue(sorter.getNumSpills() == 1); verifyCounters(sorter, context); @@ -469,9 +510,16 @@ public void testWithSingleSpillWithFinalMergeDisabled() throws IOException { MemoryUpdateCallbackHandler handler = new MemoryUpdateCallbackHandler(); context.requestInitialMemory(ExternalSorter.getInitialMemoryRequirement(conf, context.getTotalMemoryAvailableToTask()), handler); - DefaultSorter sorter = new DefaultSorter(context, conf, 1, handler.getMemoryAssigned()); - writeData(sorter, 1000, 10); + SorterWrapper sorterWrapper = new SorterWrapper(context, conf, 1, handler.getMemoryAssigned()); + DefaultSorter sorter = sorterWrapper.getSorter(); + + Text[] keys = generateData(1000, 10); + Text[] values = generateData(1000, 10); + for (int i = 0; i < keys.length; i++) { + sorterWrapper.writeKeyValue(keys[i], values[i]); + } + sorterWrapper.close(); assertTrue(sorter.getNumSpills() == 1); ArgumentCaptor eventCaptor = ArgumentCaptor.forClass(List.class); verify(context, times(1)).sendEvents(eventCaptor.capture()); @@ -482,6 +530,7 @@ public void testWithSingleSpillWithFinalMergeDisabled() throws IOException { ShuffleUserPayloads.DataMovementEventPayloadProto shufflePayload = ShuffleUserPayloads .DataMovementEventPayloadProto.parseFrom(ByteString.copyFrom(cdme.getUserPayload())); assertTrue(shufflePayload.getPathComponent().equalsIgnoreCase(UniqueID + "_0")); + verifyOutputPermissions(shufflePayload.getPathComponent()); } } @@ -499,9 +548,16 @@ public void testWithMultipleSpillsWithFinalMergeDisabled() throws IOException { MemoryUpdateCallbackHandler handler = new MemoryUpdateCallbackHandler(); context.requestInitialMemory(ExternalSorter.getInitialMemoryRequirement(conf, context.getTotalMemoryAvailableToTask()), handler); - DefaultSorter sorter = new DefaultSorter(context, conf, 1, handler.getMemoryAssigned()); + SorterWrapper sorterWrapper = new SorterWrapper(context, conf, 1, handler.getMemoryAssigned()); + DefaultSorter sorter = sorterWrapper.getSorter(); + + Text[] keys = generateData(10000, 1000); + Text[] values = generateData(10000, 1000); + for (int i = 0; i < keys.length; i++) { + sorterWrapper.writeKeyValue(keys[i], values[i]); + } + sorterWrapper.close(); - writeData(sorter, 10000, 1000); int spillCount = sorter.getNumSpills(); ArgumentCaptor eventCaptor = ArgumentCaptor.forClass(List.class); verify(context, times(1)).sendEvents(eventCaptor.capture()); @@ -513,6 +569,7 @@ public void testWithMultipleSpillsWithFinalMergeDisabled() throws IOException { ShuffleUserPayloads.DataMovementEventPayloadProto shufflePayload = ShuffleUserPayloads .DataMovementEventPayloadProto.parseFrom(ByteString.copyFrom(cdme.getUserPayload())); assertTrue(shufflePayload.getPathComponent().equalsIgnoreCase(UniqueID + "_" + spillIndex)); + verifyOutputPermissions(shufflePayload.getPathComponent()); spillIndex++; } } @@ -520,6 +577,17 @@ public void testWithMultipleSpillsWithFinalMergeDisabled() throws IOException { verifyCounters(sorter, context); } + private void verifyOutputPermissions(String spillId) throws IOException { + String subpath = Constants.TEZ_RUNTIME_TASK_OUTPUT_DIR + "/" + spillId + + "/" + Constants.TEZ_RUNTIME_TASK_OUTPUT_FILENAME_STRING; + Path outputPath = dirAllocator.getLocalPathToRead(subpath, conf); + Path indexPath = dirAllocator.getLocalPathToRead(subpath + Constants.TEZ_RUNTIME_TASK_OUTPUT_INDEX_SUFFIX_STRING, conf); + Assert.assertEquals("Incorrect output permissions", (short)0640, + localFs.getFileStatus(outputPath).getPermission().toShort()); + Assert.assertEquals("Incorrect index permissions", (short)0640, + localFs.getFileStatus(indexPath).getPermission().toShort()); + } + private void verifyCounters(DefaultSorter sorter, OutputContext context) { TezCounter numShuffleChunks = context.getCounters().findCounter(TaskCounter.SHUFFLE_CHUNK_COUNT); TezCounter additionalSpills = context.getCounters().findCounter(TaskCounter.ADDITIONAL_SPILL_COUNT); @@ -551,14 +619,57 @@ private void verifyCounters(DefaultSorter sorter, OutputContext context) { verify(context, atLeastOnce()).notifyProgress(); } - private void writeData(ExternalSorter sorter, int numKeys, int keyLen) throws IOException { - for (int i = 0; i < numKeys; i++) { - Text key = new Text(RandomStringUtils.randomAlphanumeric(keyLen)); - Text value = new Text(RandomStringUtils.randomAlphanumeric(keyLen)); + private static class SorterWrapper { + + private final DefaultSorter sorter; + private final Partitioner partitioner; + private final BitSet nonEmptyPartitions; + private final Object[] lastKeys; + private final int numPartitions; + + + public SorterWrapper(OutputContext context, Configuration conf, int numPartitions, long memoryAssigned) throws IOException { + sorter = new DefaultSorter(context, conf, numPartitions, memoryAssigned); + partitioner = TezRuntimeUtils.instantiatePartitioner(conf); + nonEmptyPartitions = new BitSet(numPartitions); + lastKeys = new Object[numPartitions]; + this.numPartitions = numPartitions; + } + + public boolean writeKeyValue(Object key, Object value) throws IOException { + int partition = partitioner.getPartition(key, value, this.numPartitions); + nonEmptyPartitions.set(partition); sorter.write(key, value); + + boolean isRLE = key.equals(lastKeys[partition]); + lastKeys[partition] = key; + return isRLE; + } + + public int getNonEmptyPartitionsCount() { + return nonEmptyPartitions.cardinality(); + } + + public int getEmptyPartitionsCount() { + return numPartitions - nonEmptyPartitions.cardinality(); + } + + public void close () throws IOException { + sorter.flush(); + sorter.close(); + } + + public DefaultSorter getSorter() { + return sorter; + } + } + + private static Text[] generateData(int numKeys, int keyLen) { + Text[] ret = new Text[numKeys]; + for (int i = 0; i < numKeys; i++) { + ret[i] = new Text(RandomStringUtils.randomAlphanumeric(keyLen)); } - sorter.flush(); - sorter.close(); + return ret; } private OutputContext createTezOutputContext() throws IOException { @@ -591,7 +702,7 @@ private OutputContext createTezOutputContext() throws IOException { callback.memoryAssigned(requestedSize); return null; } - }).when(context).requestInitialMemory(anyLong(), any(MemoryUpdateCallback.class)); + }).when(context).requestInitialMemory(anyLong(), any()); return context; } -} \ No newline at end of file +} diff --git a/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/writers/TestUnorderedPartitionedKVWriter.java b/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/writers/TestUnorderedPartitionedKVWriter.java index f1cea7ecc9..dc7357c9cc 100644 --- a/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/writers/TestUnorderedPartitionedKVWriter.java +++ b/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/writers/TestUnorderedPartitionedKVWriter.java @@ -21,13 +21,13 @@ import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNull; import static org.junit.Assert.assertTrue; -import static org.mockito.Matchers.any; -import static org.mockito.Matchers.anyListOf; -import static org.mockito.Matchers.eq; +import static org.mockito.Mockito.any; +import static org.mockito.Mockito.anyList; import static org.mockito.Mockito.atLeast; import static org.mockito.Mockito.atMost; import static org.mockito.Mockito.doAnswer; import static org.mockito.Mockito.doReturn; +import static org.mockito.Mockito.eq; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.never; import static org.mockito.Mockito.times; @@ -35,6 +35,7 @@ import java.io.ByteArrayInputStream; import java.io.DataInputStream; +import java.io.InputStream; import java.io.IOException; import java.nio.ByteBuffer; import java.util.Arrays; @@ -51,11 +52,16 @@ import java.util.regex.Pattern; import com.google.protobuf.ByteString; +import org.apache.hadoop.fs.CommonConfigurationKeys; +import org.apache.hadoop.fs.permission.FsAction; import org.apache.tez.dag.api.TezConfiguration; -import org.apache.tez.runtime.api.TaskFailureType; import org.apache.tez.runtime.api.events.VertexManagerEvent; +import org.apache.tez.runtime.library.common.Constants; +import org.apache.tez.runtime.library.common.writers.UnorderedPartitionedKVWriter.SpillInfo; +import org.apache.tez.runtime.library.shuffle.impl.ShuffleUserPayloads; import org.apache.tez.runtime.library.shuffle.impl.ShuffleUserPayloads.VertexManagerEventPayloadProto; import org.apache.tez.runtime.library.utils.DATA_RANGE_IN_MB; +import org.mockito.invocation.InvocationOnMock; import org.roaringbitmap.RoaringBitmap; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -101,7 +107,6 @@ import com.google.common.collect.Multimap; import com.google.common.collect.Sets; import org.mockito.ArgumentCaptor; -import org.mockito.invocation.InvocationOnMock; import org.mockito.stubbing.Answer; @RunWith(value = Parameterized.class) @@ -261,7 +266,7 @@ public void testMultipleSpills() throws IOException, InterruptedException { @Test(timeout = 10000) public void testMultipleSpillsWithSmallBuffer() throws IOException, InterruptedException { // numBuffers is much higher than available threads. - baseTest(200, 10, null, shouldCompress, 512, 0, 9600); + baseTest(200, 10, null, shouldCompress, 512, 0, 9600, false); } @Test(timeout = 10000) @@ -277,7 +282,9 @@ public void testNoRecords() throws IOException, InterruptedException { @Test(timeout = 10000) public void testNoRecords_SinglePartition() throws IOException, InterruptedException { // skipBuffers - baseTest(0, 1, null, shouldCompress, -1, 0); + baseTest(0, 1, null, shouldCompress, -1, 0, 2048, false); + // Check with data via events + baseTest(0, 1, null, shouldCompress, -1, 0, 2048, true); } @Test(timeout = 10000) @@ -290,6 +297,10 @@ public void testNoSpill_SinglePartition() throws IOException, InterruptedExcepti baseTest(10, 1, null, shouldCompress, -1, 0); } + @Test(timeout = 10000) + public void testSpill_SinglePartition() throws IOException, InterruptedException { + baseTest(1000, 1, null, shouldCompress, -1, 0, 2048, true); + } @Test(timeout = 10000) public void testRandomText() throws IOException, InterruptedException { @@ -406,7 +417,7 @@ public void textTest(int numRegularRecords, int numPartitions, long availableMem numRecordsWritten++; } if (pipeliningEnabled) { - verify(outputContext, times(numLargeKeys)).sendEvents(anyListOf(Event.class)); + verify(outputContext, times(numLargeKeys)).sendEvents(anyList()); } // Write Large val records @@ -422,7 +433,7 @@ public void textTest(int numRegularRecords, int numPartitions, long availableMem numRecordsWritten++; } if (pipeliningEnabled) { - verify(outputContext, times(numLargevalues + numLargeKeys)).sendEvents(anyListOf(Event.class)); + verify(outputContext, times(numLargevalues + numLargeKeys)).sendEvents(anyList()); } // Write records where key + val are large (but both can fit in the buffer individually) @@ -439,11 +450,11 @@ public void textTest(int numRegularRecords, int numPartitions, long availableMem } if (pipeliningEnabled) { verify(outputContext, times(numLargevalues + numLargeKeys + numLargeKvPairs)) - .sendEvents(anyListOf(Event.class)); + .sendEvents(anyList()); } List events = kvWriter.close(); - verify(outputContext, never()).reportFailure(any(TaskFailureType.class), any(Throwable.class), any(String.class)); + verify(outputContext, never()).reportFailure(any(), any(), any()); if (!pipeliningEnabled) { VertexManagerEvent vmEvent = null; @@ -485,6 +496,7 @@ public void textTest(int numRegularRecords, int numPartitions, long availableMem assertEquals(numPartitions, cdme.getCount()); DataMovementEventPayloadProto eventProto = DataMovementEventPayloadProto.parseFrom( ByteString.copyFrom(cdme.getUserPayload())); + assertFalse(eventProto.hasData()); BitSet emptyPartitionBits = null; if (partitionsWithData.cardinality() != numPartitions) { assertTrue(eventProto.hasEmptyPartitions()); @@ -509,6 +521,7 @@ public void textTest(int numRegularRecords, int numPartitions, long availableMem if (numRecordsWritten > 0) { assertTrue(localFs.exists(outputFilePath)); assertTrue(localFs.exists(spillFilePath)); + checkPermissions(outputFilePath, spillFilePath); } else { return; } @@ -718,8 +731,8 @@ private void baseTestWithPipelinedTransfer(int numRecords, int numPartitions, Se } verifyPartitionStats(VMEvent, partitionsWithData); - verify(outputContext, never()).reportFailure(any(TaskFailureType.class), - any(Throwable.class), any(String.class)); + verify(outputContext, never()).reportFailure(any(), + any(), any()); assertNull(kvWriter.currentBuffer); assertEquals(0, kvWriter.availableBuffers.size()); @@ -793,14 +806,28 @@ private void baseTestWithPipelinedTransfer(int numRecords, int numPartitions, Se if (numRecordsWritten > 0) { int numSpills = kvWriter.numSpills.get(); for (int i = 0; i < numSpills; i++) { - assertTrue(localFs.exists(taskOutput.getSpillFileForWrite(i, 10))); - assertTrue(localFs.exists(taskOutput.getSpillIndexFileForWrite(i, 10))); + Path outputFile = taskOutput.getSpillFileForWrite(i, 10); + Path indexFile = taskOutput.getSpillIndexFileForWrite(i, 10); + assertTrue(localFs.exists(outputFile)); + assertTrue(localFs.exists(indexFile)); + checkPermissions(outputFile, indexFile); } } else { return; } } + private void checkPermissions(Path outputFile, Path indexFile) throws IOException { + assertEquals("Incorrect output permissions (user)", FsAction.READ_WRITE, + localFs.getFileStatus(outputFile).getPermission().getUserAction()); + assertEquals("Incorrect output permissions (group)", FsAction.READ, + localFs.getFileStatus(outputFile).getPermission().getGroupAction()); + assertEquals("Incorrect index permissions (user)", FsAction.READ_WRITE, + localFs.getFileStatus(indexFile).getPermission().getUserAction()); + assertEquals("Incorrect index permissions (group)", FsAction.READ, + localFs.getFileStatus(indexFile).getPermission().getGroupAction()); + } + private void verifyEmptyPartitions(DataMovementEventPayloadProto eventProto, int numRecordsWritten, int numPartitions, Set skippedPartitions) throws IOException { @@ -948,8 +975,8 @@ private void baseTestWithFinalMergeDisabled(int numRecords, int numPartitions, } } - verify(outputContext, never()).reportFailure(any(TaskFailureType.class), - any(Throwable.class), any(String.class)); + verify(outputContext, never()).reportFailure(any(), + any(), any()); assertNull(kvWriter.currentBuffer); assertEquals(0, kvWriter.availableBuffers.size()); @@ -1041,6 +1068,10 @@ private void baseTestWithFinalMergeDisabled(int numRecords, int numPartitions, assertEquals(2, matcher.groupCount()); assertEquals(uniqueId, matcher.group(1)); assertTrue("spill id should be present in path component", matcher.group(2) != null); + Path outputPath = new Path(outputContext.getWorkDirs()[0], + "output/" + eventProto.getPathComponent() + "/" + Constants.TEZ_RUNTIME_TASK_OUTPUT_FILENAME_STRING); + Path indexPath = outputPath.suffix(Constants.TEZ_RUNTIME_TASK_OUTPUT_INDEX_SUFFIX_STRING); + checkPermissions(outputPath, indexPath); } else { assertEquals(0, eventProto.getSpillId()); if (outputRecordsCounter.getValue() > 0) { @@ -1075,12 +1106,12 @@ private void baseTest(int numRecords, int numPartitions, Set skippedPar boolean shouldCompress, int maxSingleBufferSizeBytes, int bufferMergePercent) throws IOException, InterruptedException { baseTest(numRecords, numPartitions, skippedPartitions, shouldCompress, - maxSingleBufferSizeBytes, bufferMergePercent, 2048); + maxSingleBufferSizeBytes, bufferMergePercent, 2048, false); } private void baseTest(int numRecords, int numPartitions, Set skippedPartitions, boolean shouldCompress, int maxSingleBufferSizeBytes, int bufferMergePercent, int - availableMemory) + availableMemory, boolean dataViaEventEnabled) throws IOException, InterruptedException { PartitionerForTest partitioner = new PartitionerForTest(); ApplicationId appId = ApplicationId.newInstance(10000000, 1); @@ -1096,6 +1127,9 @@ private void baseTest(int numRecords, int numPartitions, Set skippedPar conf.setInt( TezRuntimeConfiguration.TEZ_RUNTIME_UNORDERED_PARTITIONED_KVWRITER_BUFFER_MERGE_PERCENT, bufferMergePercent); + conf.setBoolean( + TezRuntimeConfiguration.TEZ_RUNTIME_TRANSFER_DATA_VIA_EVENTS_ENABLED, + dataViaEventEnabled); CompressionCodec codec = null; if (shouldCompress) { @@ -1137,12 +1171,22 @@ private void baseTest(int numRecords, int numPartitions, Set skippedPar if (numPartitions == 1) { assertEquals(true, kvWriter.skipBuffers); + + // VM & DME events + assertEquals(2, events.size()); + Event event1 = events.get(1); + assertTrue(event1 instanceof CompositeDataMovementEvent); + CompositeDataMovementEvent dme = (CompositeDataMovementEvent) event1; + ByteBuffer bb = dme.getUserPayload(); + ShuffleUserPayloads.DataMovementEventPayloadProto shufflePayload = + ShuffleUserPayloads.DataMovementEventPayloadProto.parseFrom(ByteString.copyFrom(bb)); + assertEquals(kvWriter.outputRecordsCounter.getValue(), shufflePayload.getNumRecord()); } int recordsPerBuffer = sizePerBuffer / sizePerRecordWithOverhead; int numExpectedSpills = numRecordsWritten / recordsPerBuffer / kvWriter.spillLimit; - verify(outputContext, never()).reportFailure(any(TaskFailureType.class), any(Throwable.class), any(String.class)); + verify(outputContext, never()).reportFailure(any(), any(), any()); assertNull(kvWriter.currentBuffer); assertEquals(0, kvWriter.availableBuffers.size()); @@ -1169,13 +1213,16 @@ private void baseTest(int numRecords, int numPartitions, Set skippedPar long fileOutputBytes = fileOutputBytesCounter.getValue(); if (numRecordsWritten > 0) { assertTrue(fileOutputBytes > 0); - if (!shouldCompress) { + if ((!shouldCompress) && (!dataViaEventEnabled)) { assertTrue(fileOutputBytes > outputRecordBytesCounter.getValue()); } } else { assertEquals(0, fileOutputBytes); } - assertEquals(recordsPerBuffer * numExpectedSpills, spilledRecordsCounter.getValue()); + if (!dataViaEventEnabled) { + assertEquals(recordsPerBuffer * numExpectedSpills, + spilledRecordsCounter.getValue()); + } long additionalSpillBytesWritten = additionalSpillBytesWritternCounter.getValue(); long additionalSpillBytesRead = additionalSpillBytesReadCounter.getValue(); if (numExpectedSpills == 0) { @@ -1183,13 +1230,24 @@ private void baseTest(int numRecords, int numPartitions, Set skippedPar assertEquals(0, additionalSpillBytesRead); } else { assertTrue(additionalSpillBytesWritten > 0); - assertTrue(additionalSpillBytesRead > 0); - if (!shouldCompress) { - assertTrue(additionalSpillBytesWritten > (recordsPerBuffer * numExpectedSpills * sizePerRecord)); - assertTrue(additionalSpillBytesRead > (recordsPerBuffer * numExpectedSpills * sizePerRecord)); + if (!dataViaEventEnabled) { + assertTrue(additionalSpillBytesRead > 0); + if (!shouldCompress) { + assertTrue(additionalSpillBytesWritten > + (recordsPerBuffer * numExpectedSpills * sizePerRecord)); + assertTrue(additionalSpillBytesRead > + (recordsPerBuffer * numExpectedSpills * sizePerRecord)); + } + } else { + if (kvWriter.writer.getCompressedLength() > + TezRuntimeConfiguration.TEZ_RUNTIME_TRANSFER_DATA_VIA_EVENTS_MAX_SIZE_DEFAULT) { + assertTrue(additionalSpillBytesWritten > 0); + } } } - assertEquals(additionalSpillBytesWritten, additionalSpillBytesRead); + if (!dataViaEventEnabled) { + assertEquals(additionalSpillBytesWritten, additionalSpillBytesRead); + } // due to multiple threads, buffers could be merged in chunks in scheduleSpill. assertTrue(numExpectedSpills >= numAdditionalSpillsCounter.getValue()); @@ -1238,29 +1296,61 @@ private void baseTest(int numRecords, int numPartitions, Set skippedPar Path outputFilePath = kvWriter.finalOutPath; Path spillFilePath = kvWriter.finalIndexPath; - if (numRecordsWritten > 0) { - assertTrue(localFs.exists(outputFilePath)); - assertTrue(localFs.exists(spillFilePath)); - } else { + if (numRecordsWritten <= 0) { return; } + boolean isInMem= eventProto.getData().hasData(); + assertTrue(localFs.exists(outputFilePath)); + assertEquals("Incorrect output permissions (user)", FsAction.READ_WRITE, + localFs.getFileStatus(outputFilePath).getPermission().getUserAction()); + assertEquals("Incorrect output permissions (group)", FsAction.READ, + localFs.getFileStatus(outputFilePath).getPermission().getGroupAction()); + if( !isInMem ) { + assertTrue(localFs.exists(spillFilePath)); + assertEquals("Incorrect index permissions (user)", FsAction.READ_WRITE, + localFs.getFileStatus(spillFilePath).getPermission().getUserAction()); + assertEquals("Incorrect index permissions (group)", FsAction.READ, + localFs.getFileStatus(spillFilePath).getPermission().getGroupAction()); + + // verify no intermediate spill files have been left around + synchronized (kvWriter.spillInfoList) { + for (SpillInfo spill : kvWriter.spillInfoList) { + assertFalse("lingering intermediate spill file " + spill.outPath, + localFs.exists(spill.outPath)); + } + } + } + // Special case for 0 records. - TezSpillRecord spillRecord = new TezSpillRecord(spillFilePath, conf); DataInputBuffer keyBuffer = new DataInputBuffer(); DataInputBuffer valBuffer = new DataInputBuffer(); IntWritable keyDeser = new IntWritable(); LongWritable valDeser = new LongWritable(); for (int i = 0; i < numOutputs; i++) { - TezIndexRecord indexRecord = spillRecord.getIndex(i); - if (skippedPartitions != null && skippedPartitions.contains(i)) { - assertFalse("The Index Record for partition " + i + " should not have any data", indexRecord.hasData()); - continue; + IFile.Reader reader = null; + InputStream inStream; + if (isInMem) { + // Read from in memory payload + int dataLoadSize = eventProto.getData().getData().size(); + inStream = new ByteArrayInputStream(eventProto.getData().getData().toByteArray()); + reader = new IFile.Reader(inStream, dataLoadSize, codec, null, + null, false, 0, -1); + } else { + TezSpillRecord spillRecord = new TezSpillRecord(spillFilePath, conf); + TezIndexRecord indexRecord = spillRecord.getIndex(i); + if (skippedPartitions != null && skippedPartitions.contains(i)) { + assertFalse("The Index Record for partition " + i + " should not have any data", indexRecord.hasData()); + continue; + } + + FSDataInputStream tmpStream = FileSystem.getLocal(conf).open(outputFilePath); + tmpStream.seek(indexRecord.getStartOffset()); + inStream = tmpStream; + reader = new IFile.Reader(tmpStream, indexRecord.getPartLength(), codec, null, + null, false, 0, -1); } - FSDataInputStream inStream = FileSystem.getLocal(conf).open(outputFilePath); - inStream.seek(indexRecord.getStartOffset()); - IFile.Reader reader = new IFile.Reader(inStream, indexRecord.getPartLength(), codec, null, - null, false, 0, -1); + while (reader.nextRawKey(keyBuffer)) { reader.nextRawValue(valBuffer); keyDeser.readFields(keyBuffer); @@ -1332,6 +1422,7 @@ private Configuration createConfiguration(OutputContext outputContext, boolean shouldCompress, int maxSingleBufferSizeBytes, Class partitionerClass) { Configuration conf = new Configuration(false); + conf.set(CommonConfigurationKeys.FS_PERMISSIONS_UMASK_KEY, "077"); conf.setStrings(TezRuntimeFrameworkConfigs.LOCAL_DIRS, outputContext.getWorkDirs()); conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_KEY_CLASS, keyClass.getName()); conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_VALUE_CLASS, valClass.getName()); diff --git a/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/conf/TestUnorderedKVInputConfig.java b/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/conf/TestUnorderedKVInputConfig.java index bb754429b4..d04fa6d298 100644 --- a/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/conf/TestUnorderedKVInputConfig.java +++ b/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/conf/TestUnorderedKVInputConfig.java @@ -58,6 +58,7 @@ public void testSetters() { fromConf.set("test.conf.key.1", "confkey1"); fromConf.setInt(TezRuntimeConfiguration.TEZ_RUNTIME_IFILE_READAHEAD_BYTES, 1111); fromConf.set("io.shouldExist", "io"); + fromConf.set("ssl.shouldExist", "ssl"); Map additionalConf = new HashMap(); additionalConf.put("test.key.2", "key2"); additionalConf.put(TezRuntimeConfiguration.TEZ_RUNTIME_IO_SORT_FACTOR, "3"); @@ -105,6 +106,7 @@ public void testSetters() { assertEquals("io", conf.get("io.shouldExist")); assertEquals("file", conf.get("file.shouldExist")); assertEquals("fs", conf.get("fs.shouldExist")); + assertEquals("ssl", conf.get("ssl.shouldExist")); assertNull(conf.get("test.conf.key.1")); assertNull(conf.get("test.key.1")); assertNull(conf.get("test.key.2")); diff --git a/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/conf/TestUnorderedPartitionedKVOutputConfig.java b/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/conf/TestUnorderedPartitionedKVOutputConfig.java index 5e49b5118e..bff2868020 100644 --- a/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/conf/TestUnorderedPartitionedKVOutputConfig.java +++ b/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/conf/TestUnorderedPartitionedKVOutputConfig.java @@ -82,6 +82,13 @@ public void testSetters() { .setAvailableBufferSize(1111) .setAdditionalConfiguration("fs.shouldExist", "fs") .setAdditionalConfiguration("test.key.1", "key1") + .setAdditionalConfiguration(TezRuntimeConfiguration + .TEZ_RUNTIME_EMPTY_PARTITION_INFO_VIA_EVENTS_ENABLED, "true") + .setAdditionalConfiguration(TezRuntimeConfiguration + .TEZ_RUNTIME_TRANSFER_DATA_VIA_EVENTS_MAX_SIZE, "5120") + .setAdditionalConfiguration(TezRuntimeConfiguration + .TEZ_RUNTIME_TRANSFER_DATA_VIA_EVENTS_SUPPORT_IN_MEM_FILE, + "false") .setAdditionalConfiguration(TezRuntimeConfiguration .TEZ_RUNTIME_PIPELINED_SHUFFLE_ENABLED, "true") .setAdditionalConfiguration(TezRuntimeConfiguration @@ -121,6 +128,12 @@ public void testSetters() { TezRuntimeConfiguration.TEZ_RUNTIME_IFILE_READAHEAD_BYTES_DEFAULT)); assertEquals(2222, conf.getInt(TezRuntimeConfiguration.TEZ_RUNTIME_UNORDERED_OUTPUT_MAX_PER_BUFFER_SIZE_BYTES, 0)); + assertEquals(true, + conf.getBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_EMPTY_PARTITION_INFO_VIA_EVENTS_ENABLED, false)); + assertEquals(5120, + conf.getInt(TezRuntimeConfiguration.TEZ_RUNTIME_TRANSFER_DATA_VIA_EVENTS_MAX_SIZE, 512)); + assertEquals(false, + conf.getBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_TRANSFER_DATA_VIA_EVENTS_SUPPORT_IN_MEM_FILE, true)); assertEquals("io", conf.get("io.shouldExist")); assertEquals("file", conf.get("file.shouldExist")); assertEquals("fs", conf.get("fs.shouldExist")); diff --git a/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/input/TestOrderedGroupedKVInput.java b/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/input/TestOrderedGroupedKVInput.java index d4be80211a..c1cdf7cfa0 100644 --- a/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/input/TestOrderedGroupedKVInput.java +++ b/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/input/TestOrderedGroupedKVInput.java @@ -14,7 +14,8 @@ package org.apache.tez.runtime.library.input; -import static org.mockito.Matchers.any; +import static org.junit.Assert.assertEquals; +import static org.mockito.Mockito.any; import static org.mockito.Mockito.doAnswer; import static org.mockito.Mockito.doReturn; import static org.mockito.Mockito.doThrow; @@ -57,6 +58,34 @@ public void testInterruptWhileAwaitingInput() throws IOException, TezException { } + @Test + public void testMergeConfig() throws IOException, TezException { + Configuration baseConf = new Configuration(false); + baseConf.set("base-key", "base-value"); + + Configuration payloadConf = new Configuration(false); + payloadConf.set("local-key", "local-value"); + + InputContext inputContext = mock(InputContext.class); + + UserPayload payLoad = TezUtils.createUserPayloadFromConf(payloadConf); + String[] workingDirs = new String[]{"workDir1"}; + TezCounters counters = new TezCounters(); + + + doReturn(payLoad).when(inputContext).getUserPayload(); + doReturn(workingDirs).when(inputContext).getWorkDirs(); + doReturn(counters).when(inputContext).getCounters(); + doReturn(baseConf).when(inputContext).getContainerConfiguration(); + + OrderedGroupedKVInput input = new OrderedGroupedKVInput(inputContext, 1); + input.initialize(); + + Configuration mergedConf = input.conf; + assertEquals("base-value", mergedConf.get("base-key")); + assertEquals("local-value", mergedConf.get("local-key")); + } + private InputContext createMockInputContext() throws IOException { InputContext inputContext = mock(InputContext.class); @@ -70,6 +99,7 @@ private InputContext createMockInputContext() throws IOException { doReturn(workingDirs).when(inputContext).getWorkDirs(); doReturn(200 * 1024 * 1024l).when(inputContext).getTotalMemoryAvailableToTask(); doReturn(counters).when(inputContext).getCounters(); + doReturn(new Configuration(false)).when(inputContext).getContainerConfiguration(); doAnswer(new Answer() { @Override diff --git a/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/output/OutputTestHelpers.java b/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/output/OutputTestHelpers.java index 573d53e7b1..73a55d942f 100644 --- a/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/output/OutputTestHelpers.java +++ b/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/output/OutputTestHelpers.java @@ -14,8 +14,8 @@ package org.apache.tez.runtime.library.output; -import static org.mockito.Matchers.any; -import static org.mockito.Matchers.anyLong; +import static org.mockito.Mockito.any; +import static org.mockito.Mockito.anyLong; import static org.mockito.Mockito.doAnswer; import static org.mockito.Mockito.doReturn; import static org.mockito.Mockito.mock; @@ -28,7 +28,6 @@ import org.apache.tez.common.counters.TezCounters; import org.apache.tez.dag.api.TezConfiguration; import org.apache.tez.dag.api.UserPayload; -import org.apache.tez.runtime.api.MemoryUpdateCallback; import org.apache.tez.runtime.api.OutputContext; import org.apache.tez.runtime.api.OutputStatisticsReporter; import org.apache.tez.runtime.api.impl.ExecutionContextImpl; @@ -36,7 +35,10 @@ import org.mockito.invocation.InvocationOnMock; import org.mockito.stubbing.Answer; -class OutputTestHelpers { +final class OutputTestHelpers { + + private OutputTestHelpers() {} + static OutputContext createOutputContext() throws IOException { OutputContext outputContext = mock(OutputContext.class); Configuration conf = new TezConfiguration(); @@ -48,28 +50,32 @@ static OutputContext createOutputContext() throws IOException { doReturn("destinationVertex").when(outputContext).getDestinationVertexName(); doReturn(payLoad).when(outputContext).getUserPayload(); doReturn(workingDirs).when(outputContext).getWorkDirs(); - doReturn(200 * 1024 * 1024l).when(outputContext).getTotalMemoryAvailableToTask(); + doReturn(200 * 1024 * 1024L).when(outputContext).getTotalMemoryAvailableToTask(); doReturn(counters).when(outputContext).getCounters(); doReturn(statsReporter).when(outputContext).getStatisticsReporter(); + doReturn(new Configuration(false)).when(outputContext).getContainerConfiguration(); return outputContext; } - static OutputContext createOutputContext(Configuration conf, Path workingDir) throws IOException { + static OutputContext createOutputContext(Configuration conf, Configuration userPayloadConf, Path workingDir) + throws IOException { OutputContext ctx = mock(OutputContext.class); doAnswer(new Answer() { - @Override public Void answer(InvocationOnMock invocation) throws Throwable { + @Override public Void answer(InvocationOnMock invocation) { long requestedSize = (Long) invocation.getArguments()[0]; MemoryUpdateCallbackHandler callback = (MemoryUpdateCallbackHandler) invocation .getArguments()[1]; callback.memoryAssigned(requestedSize); return null; } - }).when(ctx).requestInitialMemory(anyLong(), any(MemoryUpdateCallback.class)); - doReturn(TezUtils.createUserPayloadFromConf(conf)).when(ctx).getUserPayload(); + }).when(ctx).requestInitialMemory(anyLong(), any()); + doReturn(conf).when(ctx).getContainerConfiguration(); + doReturn(TezUtils.createUserPayloadFromConf(userPayloadConf)).when(ctx).getUserPayload(); + doReturn("taskVertex").when(ctx).getTaskVertexName(); doReturn("destinationVertex").when(ctx).getDestinationVertexName(); doReturn("UUID").when(ctx).getUniqueIdentifier(); doReturn(new String[] { workingDir.toString() }).when(ctx).getWorkDirs(); - doReturn(200 * 1024 * 1024l).when(ctx).getTotalMemoryAvailableToTask(); + doReturn(200 * 1024 * 1024L).when(ctx).getTotalMemoryAvailableToTask(); doReturn(new TezCounters()).when(ctx).getCounters(); OutputStatisticsReporter statsReporter = mock(OutputStatisticsReporter.class); doReturn(statsReporter).when(ctx).getStatisticsReporter(); diff --git a/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/output/TestOnFileSortedOutput.java b/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/output/TestOnFileSortedOutput.java index 77620258dc..47f841c3d6 100644 --- a/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/output/TestOnFileSortedOutput.java +++ b/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/output/TestOnFileSortedOutput.java @@ -32,7 +32,6 @@ import org.apache.tez.dag.api.UserPayload; import org.apache.tez.runtime.api.ExecutionContext; import org.apache.tez.runtime.api.Event; -import org.apache.tez.runtime.api.MemoryUpdateCallback; import org.apache.tez.runtime.api.OutputContext; import org.apache.tez.runtime.api.OutputStatisticsReporter; import org.apache.tez.runtime.api.events.CompositeDataMovementEvent; @@ -44,7 +43,6 @@ import org.apache.tez.runtime.library.common.sort.impl.dflt.DefaultSorter; import org.apache.tez.runtime.library.conf.OrderedPartitionedKVOutputConfig.SorterImpl; import org.apache.tez.runtime.library.partitioner.HashPartitioner; -import org.apache.tez.runtime.library.common.shuffle.ShuffleUtils; import org.apache.tez.runtime.library.shuffle.impl.ShuffleUserPayloads; import org.junit.After; import org.junit.Assert; @@ -69,8 +67,8 @@ import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; -import static org.mockito.Matchers.any; -import static org.mockito.Matchers.anyLong; +import static org.mockito.Mockito.any; +import static org.mockito.Mockito.anyLong; import static org.mockito.Mockito.doAnswer; import static org.mockito.Mockito.doReturn; import static org.mockito.Mockito.mock; @@ -378,6 +376,7 @@ public void testAllEmptyPartition() throws Exception { private OutputContext createTezOutputContext() throws IOException { String[] workingDirs = { workingDir.toString() }; + Configuration localConf = new Configuration(false); UserPayload payLoad = TezUtils.createUserPayloadFromConf(conf); DataOutputBuffer serviceProviderMetaData = new DataOutputBuffer(); serviceProviderMetaData.writeInt(PORT); @@ -400,11 +399,13 @@ private OutputContext createTezOutputContext() throws IOException { OutputContext context = mock(OutputContext.class); + doReturn(localConf).when(context).getContainerConfiguration(); doReturn(counters).when(context).getCounters(); doReturn(workingDirs).when(context).getWorkDirs(); doReturn(payLoad).when(context).getUserPayload(); doReturn(5 * 1024 * 1024l).when(context).getTotalMemoryAvailableToTask(); doReturn(UniqueID).when(context).getUniqueIdentifier(); + doReturn("v0").when(context).getTaskVertexName(); doReturn("v1").when(context).getDestinationVertexName(); doReturn(ByteBuffer.wrap(serviceProviderMetaData.getData())).when(context) .getServiceProviderMetaData @@ -418,7 +419,7 @@ private OutputContext createTezOutputContext() throws IOException { callback.memoryAssigned(requestedSize); return null; } - }).when(context).requestInitialMemory(anyLong(), any(MemoryUpdateCallback.class)); + }).when(context).requestInitialMemory(anyLong(), any()); ExecutionContext ExecutionContext = mock(ExecutionContext.class); doReturn(HOST).when(ExecutionContext).getHostName(); doReturn(reporter).when(context).getStatisticsReporter(); diff --git a/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/output/TestOnFileUnorderedKVOutput.java b/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/output/TestOnFileUnorderedKVOutput.java index 393ac2e71d..cdb246d069 100644 --- a/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/output/TestOnFileUnorderedKVOutput.java +++ b/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/output/TestOnFileUnorderedKVOutput.java @@ -18,8 +18,8 @@ package org.apache.tez.runtime.library.output; -import static org.mockito.Matchers.any; -import static org.mockito.Matchers.anyLong; +import static org.mockito.Mockito.any; +import static org.mockito.Mockito.anyLong; import static org.mockito.Mockito.atLeast; import static org.mockito.Mockito.doAnswer; import static org.mockito.Mockito.mock; @@ -61,7 +61,6 @@ import org.apache.tez.dag.records.TezVertexID; import org.apache.tez.runtime.LogicalIOProcessorRuntimeTask; import org.apache.tez.runtime.api.Event; -import org.apache.tez.runtime.api.MemoryUpdateCallback; import org.apache.tez.runtime.api.OutputContext; import org.apache.tez.runtime.api.events.CompositeDataMovementEvent; import org.apache.tez.runtime.api.impl.ExecutionContextImpl; @@ -128,7 +127,7 @@ public void testGeneratedDataMovementEvent() throws Exception { conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_VALUE_CLASS, IntWritable.class.getName()); TezSharedExecutor sharedExecutor = new TezSharedExecutor(conf); - OutputContext outputContext = createOutputContext(conf, sharedExecutor); + OutputContext outputContext = createOutputContext(conf, new Configuration(false), sharedExecutor); UnorderedKVOutput kvOutput = new UnorderedKVOutput(outputContext, 1); @@ -161,6 +160,26 @@ public void testGeneratedDataMovementEvent() throws Exception { sharedExecutor.shutdownNow(); } + @Test + public void testMergeConfig() throws Exception { + Configuration baseConf = new Configuration(false); + baseConf.set("local-key", "local-value"); + + Configuration payloadConf = new Configuration(false); + payloadConf.set("base-key", "base-value"); + + TezSharedExecutor sharedExecutor = new TezSharedExecutor(baseConf); + OutputContext outputContext = createOutputContext(payloadConf, baseConf, sharedExecutor); + + UnorderedKVOutput kvOutput = new UnorderedKVOutput(outputContext, 1); + + kvOutput.initialize(); + + Configuration mergedConf = kvOutput.conf; + assertEquals("local-value", mergedConf.get("local-key")); + assertEquals("base-value", mergedConf.get("base-key")); + } + @Test(timeout = 30000) @SuppressWarnings("unchecked") public void testWithPipelinedShuffle() throws Exception { @@ -173,7 +192,7 @@ public void testWithPipelinedShuffle() throws Exception { conf.setInt(TezRuntimeConfiguration.TEZ_RUNTIME_UNORDERED_OUTPUT_BUFFER_SIZE_MB, 1); TezSharedExecutor sharedExecutor = new TezSharedExecutor(conf); - OutputContext outputContext = createOutputContext(conf, sharedExecutor); + OutputContext outputContext = createOutputContext(conf, new Configuration(false), sharedExecutor); UnorderedKVOutput kvOutput = new UnorderedKVOutput(outputContext, 1); @@ -211,8 +230,8 @@ public void testWithPipelinedShuffle() throws Exception { sharedExecutor.shutdownNow(); } - private OutputContext createOutputContext(Configuration conf, TezSharedExecutor sharedExecutor) - throws IOException { + private OutputContext createOutputContext(Configuration payloadConf, Configuration baseConf, + TezSharedExecutor sharedExecutor) throws IOException { int appAttemptNumber = 1; TezUmbilical tezUmbilical = mock(TezUmbilical.class); String dagName = "currentDAG"; @@ -222,7 +241,7 @@ private OutputContext createOutputContext(Configuration conf, TezSharedExecutor TezVertexID vertexID = TezVertexID.getInstance(dagID, 1); TezTaskID taskID = TezTaskID.getInstance(vertexID, 1); TezTaskAttemptID taskAttemptID = TezTaskAttemptID.getInstance(taskID, 1); - UserPayload userPayload = TezUtils.createUserPayloadFromConf(conf); + UserPayload userPayload = TezUtils.createUserPayloadFromConf(payloadConf); TaskSpec mockSpec = mock(TaskSpec.class); when(mockSpec.getInputs()).thenReturn(Collections.singletonList(mock(InputSpec.class))); @@ -237,17 +256,17 @@ private OutputContext createOutputContext(Configuration conf, TezSharedExecutor ByteBuffer bb = ByteBuffer.allocate(4); bb.putInt(shufflePort); bb.position(0); - AuxiliaryServiceHelper.setServiceDataIntoEnv(conf.get(TezConfiguration.TEZ_AM_SHUFFLE_AUXILIARY_SERVICE_ID, + AuxiliaryServiceHelper.setServiceDataIntoEnv(payloadConf.get(TezConfiguration.TEZ_AM_SHUFFLE_AUXILIARY_SERVICE_ID, TezConfiguration.TEZ_AM_SHUFFLE_AUXILIARY_SERVICE_ID_DEFAULT), bb, auxEnv); OutputDescriptor outputDescriptor = mock(OutputDescriptor.class); when(outputDescriptor.getClassName()).thenReturn("OutputDescriptor"); - OutputContext realOutputContext = new TezOutputContextImpl(conf, new String[] {workDir.toString()}, + OutputContext realOutputContext = new TezOutputContextImpl(baseConf, new String[] {workDir.toString()}, appAttemptNumber, tezUmbilical, dagName, taskVertexName, destinationVertexName, -1, taskAttemptID, 0, userPayload, runtimeTask, - null, auxEnv, new MemoryDistributor(1, 1, conf) , outputDescriptor, null, + null, auxEnv, new MemoryDistributor(1, 1, payloadConf), outputDescriptor, null, new ExecutionContextImpl("localhost"), 2048, new TezSharedExecutor(defaultConf)); verify(runtimeTask, times(1)).addAndGetTezCounter(destinationVertexName); verify(runtimeTask, times(1)).getTaskStatistics(); @@ -262,7 +281,7 @@ null, auxEnv, new MemoryDistributor(1, 1, conf) , outputDescriptor, null, callback.memoryAssigned(requestedSize); return null; } - }).when(outputContext).requestInitialMemory(anyLong(), any(MemoryUpdateCallback.class)); + }).when(outputContext).requestInitialMemory(anyLong(), any()); return outputContext; } diff --git a/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/output/TestOrderedPartitionedKVOutput2.java b/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/output/TestOrderedPartitionedKVOutput2.java index f226b7c385..29ce890309 100644 --- a/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/output/TestOrderedPartitionedKVOutput2.java +++ b/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/output/TestOrderedPartitionedKVOutput2.java @@ -69,7 +69,7 @@ public void cleanup() throws IOException { @Test(timeout = 5000) public void testNonStartedOutput() throws IOException { - OutputContext outputContext = OutputTestHelpers.createOutputContext(conf, workingDir); + OutputContext outputContext = OutputTestHelpers.createOutputContext(conf, conf, workingDir); int numPartitions = 10; OrderedPartitionedKVOutput output = new OrderedPartitionedKVOutput(outputContext, numPartitions); output.initialize(); @@ -94,9 +94,24 @@ public void testNonStartedOutput() throws IOException { } } + @Test(timeout = 5000) + public void testConfigMerge() throws IOException { + Configuration localConf = new Configuration(conf); + localConf.set("config-from-local", "config-from-local-value"); + Configuration payload = new Configuration(false); + payload.set("config-from-payload", "config-from-payload-value"); + OutputContext outputContext = OutputTestHelpers.createOutputContext(localConf, payload, workingDir); + int numPartitions = 10; + OrderedPartitionedKVOutput output = new OrderedPartitionedKVOutput(outputContext, numPartitions); + output.initialize(); + Configuration configAfterMerge = output.conf; + assertEquals("config-from-local-value", configAfterMerge.get("config-from-local")); + assertEquals("config-from-payload-value", configAfterMerge.get("config-from-payload")); + } + @Test(timeout = 10000) public void testClose() throws Exception { - OutputContext outputContext = OutputTestHelpers.createOutputContext(conf, workingDir); + OutputContext outputContext = OutputTestHelpers.createOutputContext(conf, conf, workingDir); int numPartitions = 10; OrderedPartitionedKVOutput output = new OrderedPartitionedKVOutput(outputContext, numPartitions); output.initialize(); diff --git a/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/output/TestUnorderedKVOutput2.java b/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/output/TestUnorderedKVOutput2.java index 792b03f572..a52788e716 100644 --- a/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/output/TestUnorderedKVOutput2.java +++ b/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/output/TestUnorderedKVOutput2.java @@ -93,9 +93,24 @@ public void testNonStartedOutput() throws Exception { } } + @Test(timeout = 5000) + public void testConfigMerge() throws Exception { + Configuration localConf = new Configuration(conf); + localConf.set("config-from-local", "config-from-local-value"); + Configuration payload = new Configuration(false); + payload.set("config-from-payload", "config-from-payload-value"); + OutputContext outputContext = OutputTestHelpers.createOutputContext(localConf, payload, workingDir); + int numPartitions = 10; + UnorderedKVOutput output = new UnorderedKVOutput(outputContext, numPartitions); + output.initialize(); + Configuration configAfterMerge = output.conf; + assertEquals("config-from-local-value", configAfterMerge.get("config-from-local")); + assertEquals("config-from-payload-value", configAfterMerge.get("config-from-payload")); + } + @Test(timeout = 10000) public void testClose() throws Exception { - OutputContext outputContext = OutputTestHelpers.createOutputContext(conf, workingDir); + OutputContext outputContext = OutputTestHelpers.createOutputContext(conf, conf, workingDir); int numPartitions = 1; UnorderedKVOutput output = new UnorderedKVOutput(outputContext, numPartitions); output.initialize(); diff --git a/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/output/TestUnorderedPartitionedKVOutput2.java b/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/output/TestUnorderedPartitionedKVOutput2.java index eec4bf59e3..52e06300dd 100644 --- a/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/output/TestUnorderedPartitionedKVOutput2.java +++ b/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/output/TestUnorderedPartitionedKVOutput2.java @@ -22,6 +22,8 @@ import java.util.List; import com.google.protobuf.ByteString; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; import org.apache.tez.common.TezCommonUtils; import org.apache.tez.common.TezUtilsInternal; import org.apache.tez.runtime.api.Event; @@ -59,4 +61,21 @@ public void testNonStartedOutput() throws Exception { assertTrue(emptyPartionsBitSet.get(i)); } } + + @Test + public void testConfigMerge() throws Exception { + Configuration userPayloadConf = new Configuration(false); + Configuration baseConf = new Configuration(false); + + userPayloadConf.set("local-key", "local-value"); + baseConf.set("base-key", "base-value"); + OutputContext outputContext = OutputTestHelpers.createOutputContext( + userPayloadConf, baseConf, new Path("/")); + UnorderedPartitionedKVOutput output = + new UnorderedPartitionedKVOutput(outputContext, 1); + output.initialize(); + Configuration mergedConf = output.conf; + assertEquals("base-value", mergedConf.get("base-key")); + assertEquals("local-value", mergedConf.get("local-key")); + } } diff --git a/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/testutils/KVDataGen.java b/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/testutils/KVDataGen.java index 318cfc5020..62fd2bf174 100644 --- a/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/testutils/KVDataGen.java +++ b/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/testutils/KVDataGen.java @@ -25,26 +25,38 @@ import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; -public class KVDataGen { +public final class KVDataGen { static Random rnd = new Random(); + private KVDataGen() {} + public static List generateTestData(boolean repeatKeys) { return generateTestData(true, rnd.nextInt(100)); } /** - * Generate key value pair + * Generate key value pair. * * @param sorted whether data should be sorted by key * @param repeatCount number of keys to be repeated - * @return */ public static List generateTestData(boolean sorted, int repeatCount) { - List data = new LinkedList(); + return generateTestDataOfKeySize(sorted, 5, repeatCount); + } + + /** + * Generate key value pair of given amount of keys. + * + * @param sorted whether data should be sorted by key + * @param keys number of keys + * @param repeatCount number of keys to be repeated + */ + public static List generateTestDataOfKeySize(boolean sorted, int keys, int repeatCount) { + List data = new LinkedList<>(); Random rnd = new Random(); KVPair kvp = null; - for (int i = 0; i < 5; i++) { + for (int i = 0; i < keys; i++) { String keyStr = (sorted) ? ("key" + i) : (rnd.nextLong() + "key" + i); Text key = new Text(keyStr); IntWritable value = new IntWritable(i + repeatCount); @@ -52,7 +64,7 @@ public static List generateTestData(boolean sorted, int repeatCount) { data.add(kvp); if ((repeatCount > 0) && (i % 2 == 0)) { // Repeat this key for random number of times int count = rnd.nextInt(5); - for(int j = 0; j < count; j++) { + for (int j = 0; j < count; j++) { repeatCount++; value.set(i + rnd.nextInt()); kvp = new KVPair(key, value); @@ -60,7 +72,7 @@ public static List generateTestData(boolean sorted, int repeatCount) { } } } - //If we need to generated repeated keys, try to add some repeated keys to the end of file also. + // If we need to generated repeated keys, try to add some repeated keys to the end of file also. if (repeatCount > 0 && kvp != null) { data.add(kvp); data.add(kvp); @@ -69,8 +81,8 @@ public static List generateTestData(boolean sorted, int repeatCount) { } public static class KVPair { - private Text key; - private IntWritable value; + private final Text key; + private final IntWritable value; public KVPair(Text key, IntWritable value) { this.key = key; diff --git a/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/testutils/RandomTextGenerator.java b/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/testutils/RandomTextGenerator.java index c1a05d9932..d6d15720a3 100644 --- a/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/testutils/RandomTextGenerator.java +++ b/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/testutils/RandomTextGenerator.java @@ -22,12 +22,14 @@ import java.util.Random; -public class RandomTextGenerator { +public final class RandomTextGenerator { static int minWordsInKey = 10; static int wordsInKeyRange = 100; static final Random random = new Random(); + private RandomTextGenerator() {} + public static Text generateSentence() { int noWordsKey = minWordsInKey + (wordsInKeyRange != 0 ? random.nextInt(wordsInKeyRange) : 0); @@ -35,10 +37,10 @@ public static Text generateSentence() { } public static Text generateSentence(int noWords) { - StringBuffer sentence = new StringBuffer(); + StringBuilder sentence = new StringBuilder(); String space = " "; for (int i = 0; i < noWords; ++i) { - sentence.append(words[random.nextInt(words.length)]); + sentence.append(WORDS[random.nextInt(WORDS.length)]); sentence.append(space); } return new Text(sentence.toString()); @@ -49,7 +51,7 @@ public static Text generateSentence(int noWords) { *

    * A random list of 100 words from /usr/share/dict/words */ - private static String[] words = { + private static final String[] WORDS = { "diurnalness", "Homoiousian", "spiranthic", "tetragynian", "silverhead", "ungreat", diff --git a/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/testutils/RuntimeTestUtils.java b/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/testutils/RuntimeTestUtils.java new file mode 100644 index 0000000000..0885178ee5 --- /dev/null +++ b/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/testutils/RuntimeTestUtils.java @@ -0,0 +1,44 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tez.runtime.library.testutils; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.DataInputStream; +import java.io.DataOutputStream; +import java.io.IOException; +import java.io.InputStream; + +import org.apache.tez.runtime.library.common.shuffle.orderedgrouped.ShuffleHeader; + +public final class RuntimeTestUtils { + + private RuntimeTestUtils() { + } + + public static DataInputStream shuffleHeaderToDataInput(ShuffleHeader header) throws IOException { + ByteArrayOutputStream byteOutput = new ByteArrayOutputStream(1000); + DataOutputStream output = new DataOutputStream(byteOutput); + header.write(output); + + InputStream inputStream = new ByteArrayInputStream(byteOutput.toByteArray()); + DataInputStream input = new DataInputStream(inputStream); + + return input; + } +} diff --git a/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/utils/TestCodecUtils.java b/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/utils/TestCodecUtils.java new file mode 100644 index 0000000000..5141598200 --- /dev/null +++ b/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/utils/TestCodecUtils.java @@ -0,0 +1,225 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tez.runtime.library.utils; + +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.lang.reflect.Field; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; + +import org.apache.hadoop.conf.Configurable; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.CommonConfigurationKeys; +import org.apache.hadoop.fs.CommonConfigurationKeysPublic; +import org.apache.hadoop.io.compress.BZip2Codec; +import org.apache.hadoop.io.compress.CodecPool; +import org.apache.hadoop.io.compress.CompressionCodec; +import org.apache.hadoop.io.compress.CompressionInputStream; +import org.apache.hadoop.io.compress.CompressionOutputStream; +import org.apache.hadoop.io.compress.Compressor; +import org.apache.hadoop.io.compress.Decompressor; +import org.apache.hadoop.io.compress.DecompressorStream; +import org.apache.hadoop.io.compress.DefaultCodec; +import org.apache.hadoop.io.compress.GzipCodec; +import org.apache.hadoop.io.compress.Lz4Codec; +import org.apache.hadoop.io.compress.SnappyCodec; +import org.apache.hadoop.io.compress.ZStandardCodec; +import org.apache.tez.runtime.library.api.TezRuntimeConfiguration; +import org.apache.tez.runtime.library.common.shuffle.orderedgrouped.DummyCompressionCodec; +import org.apache.tez.runtime.library.common.sort.impl.IFileInputStream; +import org.junit.Assert; +import org.junit.Test; + +import static org.mockito.Mockito.mock; + +public class TestCodecUtils { + + @Test + public void testConcurrentDecompressorCreationWithModifiedBuffersize() throws Exception { + testConcurrentDecompressorCreationWithModifiedBuffersizeOnCodec(new DefaultCodec()); + } + + private void testConcurrentDecompressorCreationWithModifiedBuffersizeOnCodec( + CompressionCodec codec) throws InterruptedException, ExecutionException { + int modifiedBufferSize = 1000; + int numberOfThreads = 1000; + + ExecutorService service = Executors.newFixedThreadPool(numberOfThreads); + + Configuration conf = new Configuration(); + conf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_COMPRESS, true); + ((Configurable) codec).setConf(conf); + + Future[] futures = new Future[numberOfThreads]; + final CountDownLatch latch = new CountDownLatch(1); + + for (int i = 0; i < numberOfThreads; i++) { + futures[i] = service.submit(() -> { + try { + waitForLatch(latch); + + Decompressor decompressor = CodecUtils.getDecompressor(codec); + DecompressorStream stream = + (DecompressorStream) CodecUtils.getDecompressedInputStreamWithBufferSize(codec, + mock(IFileInputStream.class), decompressor, modifiedBufferSize); + + Assert.assertEquals("stream buffer size is incorrect", modifiedBufferSize, + getBufferSize(stream)); + + CodecPool.returnDecompressor(decompressor); + } catch (IOException e) { + throw new RuntimeException(e); + } + }); + } + latch.countDown(); + + for (Future f : futures) { + f.get(); + } + } + + @Test + public void testConcurrentCompressorDecompressorCreation() throws Exception { + testConcurrentCompressorDecompressorCreationOnCodec(new DefaultCodec()); + } + + private void testConcurrentCompressorDecompressorCreationOnCodec(CompressionCodec codec) + throws IOException, InterruptedException, ExecutionException { + int modifiedBufferSize = 1000; + int numberOfThreads = 1000; + + ExecutorService service = Executors.newFixedThreadPool(numberOfThreads); + + Configuration conf = new Configuration(); + conf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_COMPRESS, true); + ((Configurable) codec).setConf(conf); + + Future[] futures = new Future[numberOfThreads]; + final CountDownLatch latch = new CountDownLatch(1); + + for (int i = 0; i < numberOfThreads; i++) { + // let's "randomly" choose from scenarios and test them concurrently + // 1. getDecompressedInputStreamWithBufferSize + if (i % 3 == 0) { + futures[i] = service.submit(() -> { + try { + waitForLatch(latch); + + Decompressor decompressor = CodecUtils.getDecompressor(codec); + CompressionInputStream stream = + (CompressionInputStream) CodecUtils.getDecompressedInputStreamWithBufferSize(codec, + mock(IFileInputStream.class), decompressor, modifiedBufferSize); + + Assert.assertEquals("stream buffer size is incorrect", modifiedBufferSize, + getBufferSize(stream)); + + CodecPool.returnDecompressor(decompressor); + } catch (IOException e) { + throw new RuntimeException(e); + } + }); + // 2. getCompressor + } else if (i % 3 == 1) { + futures[i] = service.submit(() -> { + try { + waitForLatch(latch); + + Compressor compressor = CodecUtils.getCompressor(codec); + CompressionOutputStream stream = + CodecUtils.createOutputStream(codec, mock(OutputStream.class), compressor); + + Assert.assertEquals("stream buffer size is incorrect", + CommonConfigurationKeysPublic.IO_FILE_BUFFER_SIZE_DEFAULT, getBufferSize(stream)); + + CodecPool.returnCompressor(compressor); + } catch (Exception e) { + throw new RuntimeException(e); + } + }); + // 3. getDecompressor + } else if (i % 3 == 2) { + futures[i] = service.submit(() -> { + try { + waitForLatch(latch); + + Decompressor decompressor = CodecUtils.getDecompressor(codec); + CompressionInputStream stream = + CodecUtils.createInputStream(codec, mock(InputStream.class), decompressor); + + Assert.assertEquals("stream buffer size is incorrect", + CommonConfigurationKeysPublic.IO_FILE_BUFFER_SIZE_DEFAULT, getBufferSize(stream)); + + CodecPool.returnDecompressor(decompressor); + } catch (Exception e) { + throw new RuntimeException(e); + } + }); + } + } + latch.countDown(); + + for (Future f : futures) { + f.get(); + } + } + + @Test + public void testDefaultBufferSize() { + Configuration conf = new Configuration(); // config with no buffersize set + + Assert.assertEquals(CodecUtils.DEFAULT_BUFFER_SIZE, + CodecUtils.getDefaultBufferSize(conf, new DummyCompressionCodec())); + Assert.assertEquals(CommonConfigurationKeysPublic.IO_FILE_BUFFER_SIZE_DEFAULT, + CodecUtils.getDefaultBufferSize(conf, new DefaultCodec())); + Assert.assertEquals(CommonConfigurationKeysPublic.IO_FILE_BUFFER_SIZE_DEFAULT, + CodecUtils.getDefaultBufferSize(conf, new BZip2Codec())); + Assert.assertEquals(CommonConfigurationKeysPublic.IO_FILE_BUFFER_SIZE_DEFAULT, + CodecUtils.getDefaultBufferSize(conf, new GzipCodec())); + Assert.assertEquals(CommonConfigurationKeys.IO_COMPRESSION_CODEC_SNAPPY_BUFFERSIZE_DEFAULT, + CodecUtils.getDefaultBufferSize(conf, new SnappyCodec())); + Assert.assertEquals(CommonConfigurationKeys.IO_COMPRESSION_CODEC_ZSTD_BUFFER_SIZE_DEFAULT, + CodecUtils.getDefaultBufferSize(conf, new ZStandardCodec())); + Assert.assertEquals(CommonConfigurationKeys.IO_COMPRESSION_CODEC_LZ4_BUFFERSIZE_DEFAULT, + CodecUtils.getDefaultBufferSize(conf, new Lz4Codec())); + } + + private void waitForLatch(CountDownLatch latch) { + try { + latch.await(); + } catch (InterruptedException e) { + throw new RuntimeException(e); + } + } + + private int getBufferSize(Object stream) { + try { + Field field = stream.getClass().getDeclaredField("buffer"); + field.setAccessible(true); + byte[] buffer = (byte[]) field.get(stream); + return buffer.length; + } catch (Exception e) { + throw new RuntimeException(e); + } + } +} diff --git a/tez-runtime-library/src/test/resources/META-INF/LICENSE.txt b/tez-runtime-library/src/test/resources/META-INF/LICENSE similarity index 100% rename from tez-runtime-library/src/test/resources/META-INF/LICENSE.txt rename to tez-runtime-library/src/test/resources/META-INF/LICENSE diff --git a/tez-runtime-library/src/test/resources/META-INF/NOTICE b/tez-runtime-library/src/test/resources/META-INF/NOTICE new file mode 100644 index 0000000000..2595905699 --- /dev/null +++ b/tez-runtime-library/src/test/resources/META-INF/NOTICE @@ -0,0 +1,6 @@ +Apache Tez +Copyright 2014-2024 The Apache Software Foundation + +This product includes software developed at +The Apache Software Foundation (http://www.apache.org/). + diff --git a/tez-runtime-library/src/test/resources/META-INF/NOTICE.txt b/tez-runtime-library/src/test/resources/META-INF/NOTICE.txt deleted file mode 100644 index 3f36fcc6ba..0000000000 --- a/tez-runtime-library/src/test/resources/META-INF/NOTICE.txt +++ /dev/null @@ -1,6 +0,0 @@ -Apache Tez -Copyright (c) 2016 The Apache Software Foundation - -This product includes software developed at -The Apache Software Foundation (http://www.apache.org/). - diff --git a/tez-runtime-library/src/test/resources/mockito-extensions/org.mockito.plugins.MockMaker b/tez-runtime-library/src/test/resources/mockito-extensions/org.mockito.plugins.MockMaker new file mode 100644 index 0000000000..a258d79ad3 --- /dev/null +++ b/tez-runtime-library/src/test/resources/mockito-extensions/org.mockito.plugins.MockMaker @@ -0,0 +1,13 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +mock-maker-inline diff --git a/tez-tests/pom.xml b/tez-tests/pom.xml index cc6da2954a..2bf8ca38ee 100644 --- a/tez-tests/pom.xml +++ b/tez-tests/pom.xml @@ -20,10 +20,14 @@ org.apache.tez tez - 0.9.1-SNAPSHOT + 0.10.5-SNAPSHOT tez-tests + + ${project.build.directory}/logs + + org.apache.tez @@ -122,13 +126,45 @@ org.bouncycastle - bcprov-jdk16 + bcprov-jdk18on + test + + + org.mockito + mockito-core test + + maven-antrun-plugin + + + generate-sources + generate-sources + + + + + + + + run + + + + + + org.apache.maven.plugins + maven-surefire-plugin + + + ${test.log.dir} + + + org.apache.maven.plugins maven-jar-plugin diff --git a/tez-tests/src/main/java/org/apache/tez/mapreduce/examples/BroadcastAndOneToOneExample.java b/tez-tests/src/main/java/org/apache/tez/mapreduce/examples/BroadcastAndOneToOneExample.java index 9dd9b59cbb..5c99f3efbf 100644 --- a/tez-tests/src/main/java/org/apache/tez/mapreduce/examples/BroadcastAndOneToOneExample.java +++ b/tez-tests/src/main/java/org/apache/tez/mapreduce/examples/BroadcastAndOneToOneExample.java @@ -54,7 +54,7 @@ import org.apache.tez.runtime.library.output.UnorderedKVOutput; import org.apache.tez.runtime.library.processor.SimpleProcessor; -import com.google.common.base.Preconditions; +import org.apache.tez.common.Preconditions; public class BroadcastAndOneToOneExample extends Configured implements Tool { public static class InputProcessor extends SimpleProcessor { diff --git a/tez-tests/src/main/java/org/apache/tez/mapreduce/examples/BroadcastLoadGen.java b/tez-tests/src/main/java/org/apache/tez/mapreduce/examples/BroadcastLoadGen.java index d9b89c1cf5..03bb0519a0 100644 --- a/tez-tests/src/main/java/org/apache/tez/mapreduce/examples/BroadcastLoadGen.java +++ b/tez-tests/src/main/java/org/apache/tez/mapreduce/examples/BroadcastLoadGen.java @@ -45,7 +45,7 @@ import org.apache.tez.runtime.library.output.UnorderedKVOutput; import org.apache.tez.runtime.library.processor.SimpleProcessor; -import com.google.common.base.Preconditions; +import org.apache.tez.common.Preconditions; public class BroadcastLoadGen extends TezExampleBase { diff --git a/tez-tests/src/main/java/org/apache/tez/mapreduce/examples/CartesianProduct.java b/tez-tests/src/main/java/org/apache/tez/mapreduce/examples/CartesianProduct.java index 6096f969a8..aea662ae04 100644 --- a/tez-tests/src/main/java/org/apache/tez/mapreduce/examples/CartesianProduct.java +++ b/tez-tests/src/main/java/org/apache/tez/mapreduce/examples/CartesianProduct.java @@ -17,7 +17,7 @@ */ package org.apache.tez.mapreduce.examples; -import com.google.common.base.Preconditions; +import org.apache.tez.common.Preconditions; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; diff --git a/tez-tests/src/main/java/org/apache/tez/mapreduce/examples/ExampleDriver.java b/tez-tests/src/main/java/org/apache/tez/mapreduce/examples/ExampleDriver.java index cdbdf13db5..cc22f6a4fa 100644 --- a/tez-tests/src/main/java/org/apache/tez/mapreduce/examples/ExampleDriver.java +++ b/tez-tests/src/main/java/org/apache/tez/mapreduce/examples/ExampleDriver.java @@ -38,11 +38,13 @@ * A description of an example program based on its class and a * human-readable description. */ -public class ExampleDriver { +public final class ExampleDriver { private static final DecimalFormat formatter = new DecimalFormat("###.##%"); - public static void main(String argv[]){ + private ExampleDriver() {} + + public static void main(String[] argv){ int exitCode = -1; ProgramDriver pgd = new ProgramDriver(); try { @@ -110,9 +112,9 @@ public static void printDAGStatus(DAGClient dagClient, String[] vertexNames, DAGStatus dagStatus = dagClient.getDAGStatus( (displayDAGCounters ? opts : null)); Progress progress = dagStatus.getDAGProgress(); - double vProgressFloat = 0.0f; + double vProgressFloat; if (progress != null) { - System.out.println(""); + System.out.println(); System.out.println("DAG: State: " + dagStatus.getState() + " Progress: " diff --git a/tez-tests/src/main/java/org/apache/tez/mapreduce/examples/FilterLinesByWord.java b/tez-tests/src/main/java/org/apache/tez/mapreduce/examples/FilterLinesByWord.java index 36215a4b43..d1c4708aee 100644 --- a/tez-tests/src/main/java/org/apache/tez/mapreduce/examples/FilterLinesByWord.java +++ b/tez-tests/src/main/java/org/apache/tez/mapreduce/examples/FilterLinesByWord.java @@ -84,13 +84,13 @@ public class FilterLinesByWord extends Configured implements Tool { private static Logger LOG = LoggerFactory.getLogger(FilterLinesByWord.class); public static final String FILTER_PARAM_NAME = "tez.runtime.examples.filterbyword.word"; - + private boolean exitOnCompletion = false; public FilterLinesByWord(boolean exitOnCompletion) { this.exitOnCompletion = exitOnCompletion; } - + private static void printUsage() { System.err.println("Usage filtelinesrbyword [-generateSplitsInClient true/]"); ToolRunner.printGenericCommandUsage(System.err); @@ -124,7 +124,11 @@ public int run(String[] args) throws Exception { String filterWord = otherArgs[2]; FileSystem fs = FileSystem.get(conf); - if (fs.exists(new Path(outputPath))) { + + Path outputPathAsPath = new Path(outputPath); + FileSystem outputFs = outputPathAsPath.getFileSystem(conf); + outputPathAsPath = outputFs.makeQualified(outputPathAsPath); + if (outputFs.exists(outputPathAsPath)) { System.err.println("Output directory : " + outputPath + " already exists"); return 2; } @@ -158,7 +162,7 @@ public int run(String[] args) throws Exception { TezClient tezSession = TezClient.create("FilterLinesByWordSession", tezConf, commonLocalResources, credentials); - tezSession.start(); // Why do I need to start the TezSession. + tezSession.start(); // Why do I need to start the TezSession? Configuration stage1Conf = new JobConf(conf); stage1Conf.set(FILTER_PARAM_NAME, filterWord); @@ -244,9 +248,9 @@ public int run(String[] args) throws Exception { return -1; } } - + dagStatus = dagClient.getDAGStatus(Sets.newHashSet(StatusGetOpts.GET_COUNTERS)); - + } finally { fs.delete(stagingDir, true); tezSession.stop(); @@ -256,7 +260,7 @@ public int run(String[] args) throws Exception { LOG.info("Application completed. " + "FinalState=" + dagStatus.getState()); return dagStatus.getState() == DAGStatus.State.SUCCEEDED ? 0 : 1; } - + public static void main(String[] args) throws Exception { FilterLinesByWord fl = new FilterLinesByWord(true); int status = ToolRunner.run(new Configuration(), fl, args); diff --git a/tez-tests/src/main/java/org/apache/tez/mapreduce/examples/FilterLinesByWordOneToOne.java b/tez-tests/src/main/java/org/apache/tez/mapreduce/examples/FilterLinesByWordOneToOne.java index 00205036f4..a79b68a852 100644 --- a/tez-tests/src/main/java/org/apache/tez/mapreduce/examples/FilterLinesByWordOneToOne.java +++ b/tez-tests/src/main/java/org/apache/tez/mapreduce/examples/FilterLinesByWordOneToOne.java @@ -77,7 +77,7 @@ public class FilterLinesByWordOneToOne extends Configured implements Tool { public static final String FILTER_PARAM_NAME = "tez.runtime.examples.filterbyword.word"; private static void printUsage() { - System.err.println("Usage filterLinesByWordOneToOne " + System.err.println("Usage filterLinesByWordOneToOne " + " [-generateSplitsInClient true/]"); ToolRunner.printGenericCommandUsage(System.err); } @@ -112,7 +112,7 @@ public int run(String[] otherArgs) throws Exception { String inputPath = otherArgs[0]; String outputPath = otherArgs[1]; String filterWord = otherArgs[2]; - + Configuration conf = getConf(); FileSystem fs = FileSystem.get(conf); if (fs.exists(new Path(outputPath))) { @@ -148,7 +148,7 @@ public int run(String[] otherArgs) throws Exception { TezClient tezSession = TezClient.create("FilterLinesByWordSession", tezConf, commonLocalResources, null); - tezSession.start(); // Why do I need to start the TezSession. + tezSession.start(); // Why do I need to start the TezSession? Configuration stage1Conf = new JobConf(conf); stage1Conf.set(FILTER_PARAM_NAME, filterWord); diff --git a/tez-tests/src/main/java/org/apache/tez/mapreduce/examples/TestOrderedWordCount.java b/tez-tests/src/main/java/org/apache/tez/mapreduce/examples/TestOrderedWordCount.java index 51e4be18ae..1b87e11713 100644 --- a/tez-tests/src/main/java/org/apache/tez/mapreduce/examples/TestOrderedWordCount.java +++ b/tez-tests/src/main/java/org/apache/tez/mapreduce/examples/TestOrderedWordCount.java @@ -18,7 +18,6 @@ package org.apache.tez.mapreduce.examples; -import java.io.ByteArrayOutputStream; import java.io.IOException; import java.util.ArrayList; import java.util.EnumSet; @@ -441,8 +440,6 @@ public int run(String[] args) throws Exception { HadoopShim hadoopShim = new HadoopShimsLoader(tezConf).getHadoopShim(); TestOrderedWordCount instance = new TestOrderedWordCount(); - FileSystem fs = FileSystem.get(conf); - String stagingDirStr = conf.get(TezConfiguration.TEZ_AM_STAGING_DIR, TezConfiguration.TEZ_AM_STAGING_DIR_DEFAULT) + Path.SEPARATOR + Long.toString(System.currentTimeMillis()); @@ -498,7 +495,10 @@ public int run(String[] args) throws Exception { String inputPath = inputPaths.get(dagIndex-1); String outputPath = outputPaths.get(dagIndex-1); - if (fs.exists(new Path(outputPath))) { + Path outputPathAsPath = new Path(outputPath); + FileSystem fs = outputPathAsPath.getFileSystem(conf); + outputPathAsPath = fs.makeQualified(outputPathAsPath); + if (fs.exists(outputPathAsPath)) { throw new FileAlreadyExistsException("Output directory " + outputPath + " already exists"); } diff --git a/tez-tests/src/main/java/org/apache/tez/mapreduce/examples/UnionExample.java b/tez-tests/src/main/java/org/apache/tez/mapreduce/examples/UnionExample.java index 7688335d37..767691d34b 100644 --- a/tez-tests/src/main/java/org/apache/tez/mapreduce/examples/UnionExample.java +++ b/tez-tests/src/main/java/org/apache/tez/mapreduce/examples/UnionExample.java @@ -64,7 +64,7 @@ import org.apache.tez.runtime.library.input.ConcatenatedMergedKeyValuesInput; import org.apache.tez.runtime.library.partitioner.HashPartitioner; -import com.google.common.base.Preconditions; +import org.apache.tez.common.Preconditions; import com.google.common.collect.Maps; public class UnionExample { @@ -262,7 +262,10 @@ public boolean run(String inputPath, String outputPath, Configuration conf) thro DAGClient dagClient = null; try { - if (fs.exists(new Path(outputPath))) { + Path outputPathAsPath = new Path(outputPath); + FileSystem outputFs = outputPathAsPath.getFileSystem(tezConf); + outputPathAsPath = outputFs.makeQualified(outputPathAsPath); + if (outputFs.exists(outputPathAsPath)) { throw new FileAlreadyExistsException("Output directory " + outputPath + " already exists"); } diff --git a/tez-tests/src/main/java/org/apache/tez/mapreduce/examples/helpers/SplitsInClientOptionParser.java b/tez-tests/src/main/java/org/apache/tez/mapreduce/examples/helpers/SplitsInClientOptionParser.java index cabc3c4ffd..21419e7179 100644 --- a/tez-tests/src/main/java/org/apache/tez/mapreduce/examples/helpers/SplitsInClientOptionParser.java +++ b/tez-tests/src/main/java/org/apache/tez/mapreduce/examples/helpers/SplitsInClientOptionParser.java @@ -26,7 +26,7 @@ import org.apache.commons.cli.Options; import org.apache.commons.cli.ParseException; -import com.google.common.base.Preconditions; +import org.apache.tez.common.Preconditions; public class SplitsInClientOptionParser { @@ -48,7 +48,7 @@ public String[] getRemainingArgs() { @SuppressWarnings("static-access") public boolean parse(String[] args, boolean defaultVal) throws ParseException { Preconditions.checkState(parsed == false, - "Craete a new instance for different option sets"); + "Create a new instance for different option sets"); parsed = true; Options opts = new Options(); Option opt = OptionBuilder diff --git a/tez-tests/src/main/javadoc/resources/META-INF/LICENSE.txt b/tez-tests/src/main/javadoc/resources/META-INF/LICENSE similarity index 100% rename from tez-tests/src/main/javadoc/resources/META-INF/LICENSE.txt rename to tez-tests/src/main/javadoc/resources/META-INF/LICENSE diff --git a/tez-tests/src/main/javadoc/resources/META-INF/NOTICE b/tez-tests/src/main/javadoc/resources/META-INF/NOTICE new file mode 100644 index 0000000000..2595905699 --- /dev/null +++ b/tez-tests/src/main/javadoc/resources/META-INF/NOTICE @@ -0,0 +1,6 @@ +Apache Tez +Copyright 2014-2024 The Apache Software Foundation + +This product includes software developed at +The Apache Software Foundation (http://www.apache.org/). + diff --git a/tez-tests/src/main/javadoc/resources/META-INF/NOTICE.txt b/tez-tests/src/main/javadoc/resources/META-INF/NOTICE.txt deleted file mode 100644 index 3f36fcc6ba..0000000000 --- a/tez-tests/src/main/javadoc/resources/META-INF/NOTICE.txt +++ /dev/null @@ -1,6 +0,0 @@ -Apache Tez -Copyright (c) 2016 The Apache Software Foundation - -This product includes software developed at -The Apache Software Foundation (http://www.apache.org/). - diff --git a/tez-tests/src/main/resources/META-INF/LICENSE.txt b/tez-tests/src/main/resources/META-INF/LICENSE similarity index 100% rename from tez-tests/src/main/resources/META-INF/LICENSE.txt rename to tez-tests/src/main/resources/META-INF/LICENSE diff --git a/tez-tests/src/main/resources/META-INF/NOTICE b/tez-tests/src/main/resources/META-INF/NOTICE new file mode 100644 index 0000000000..2595905699 --- /dev/null +++ b/tez-tests/src/main/resources/META-INF/NOTICE @@ -0,0 +1,6 @@ +Apache Tez +Copyright 2014-2024 The Apache Software Foundation + +This product includes software developed at +The Apache Software Foundation (http://www.apache.org/). + diff --git a/tez-tests/src/main/resources/META-INF/NOTICE.txt b/tez-tests/src/main/resources/META-INF/NOTICE.txt deleted file mode 100644 index 3f36fcc6ba..0000000000 --- a/tez-tests/src/main/resources/META-INF/NOTICE.txt +++ /dev/null @@ -1,6 +0,0 @@ -Apache Tez -Copyright (c) 2016 The Apache Software Foundation - -This product includes software developed at -The Apache Software Foundation (http://www.apache.org/). - diff --git a/tez-tests/src/test/java/org/apache/tez/mapreduce/TestMRRJobsDAGApi.java b/tez-tests/src/test/java/org/apache/tez/mapreduce/TestMRRJobsDAGApi.java index 5ce9c5daf9..339c46780e 100644 --- a/tez-tests/src/test/java/org/apache/tez/mapreduce/TestMRRJobsDAGApi.java +++ b/tez-tests/src/test/java/org/apache/tez/mapreduce/TestMRRJobsDAGApi.java @@ -20,6 +20,7 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; import java.io.File; import java.io.IOException; @@ -71,6 +72,7 @@ import org.apache.tez.client.TezClient; import org.apache.tez.client.TezAppMasterStatus; import org.apache.tez.common.ReflectionUtils; +import org.apache.tez.common.TezClassLoader; import org.apache.tez.common.TezUtils; import org.apache.tez.common.counters.FileSystemCounter; import org.apache.tez.common.counters.TaskCounter; @@ -129,7 +131,7 @@ import org.junit.BeforeClass; import org.junit.Test; -import com.google.common.base.Preconditions; +import org.apache.tez.common.Preconditions; import com.google.common.collect.Sets; public class TestMRRJobsDAGApi { @@ -187,7 +189,7 @@ public static void tearDown() { public void testSleepJob() throws TezException, IOException, InterruptedException { SleepProcessorConfig spConf = new SleepProcessorConfig(1); - DAG dag = DAG.create("TezSleepProcessor"); + DAG dag = DAG.create("DAG-testSleepJob"); Vertex vertex = Vertex.create("SleepVertex", ProcessorDescriptor.create( SleepProcessor.class.getName()).setUserPayload(spConf.toUserPayload()), 1, Resource.newInstance(1024, 1)); @@ -210,13 +212,18 @@ public void testSleepJob() throws TezException, IOException, InterruptedExceptio + dagStatus.getState()); Thread.sleep(500l); dagStatus = dagClient.getDAGStatus(null); + assertTrue("Memory used by AM is supposed to be 0 if not requested", dagStatus.getMemoryUsedByAM() == 0); + assertTrue("Memory used by tasks is supposed to be 0 if not requested", dagStatus.getMemoryUsedByTasks() == 0); } - dagStatus = dagClient.getDAGStatus(Sets.newHashSet(StatusGetOpts.GET_COUNTERS)); + dagStatus = dagClient.getDAGStatus(Sets.newHashSet(StatusGetOpts.GET_COUNTERS, StatusGetOpts.GET_MEMORY_USAGE)); assertEquals(DAGStatus.State.SUCCEEDED, dagStatus.getState()); assertNotNull(dagStatus.getDAGCounters()); assertNotNull(dagStatus.getDAGCounters().getGroup(FileSystemCounter.class.getName())); assertNotNull(dagStatus.getDAGCounters().findCounter(TaskCounter.GC_TIME_MILLIS)); + assertTrue("Memory used by AM is supposed to be >0", dagStatus.getMemoryUsedByAM() > 0); + assertTrue("Memory used by tasks is supposed to be >0", dagStatus.getMemoryUsedByTasks() > 0); + ExampleDriver.printDAGStatus(dagClient, new String[] { "SleepVertex" }, true, true); tezSession.stop(); } @@ -225,7 +232,7 @@ public void testSleepJob() throws TezException, IOException, InterruptedExceptio public void testNonDefaultFSStagingDir() throws Exception { SleepProcessorConfig spConf = new SleepProcessorConfig(1); - DAG dag = DAG.create("TezSleepProcessor"); + DAG dag = DAG.create("DAG-testNonDefaultFSStagingDir"); Vertex vertex = Vertex.create("SleepVertex", ProcessorDescriptor.create( SleepProcessor.class.getName()).setUserPayload(spConf.toUserPayload()), 1, Resource.newInstance(1024, 1)); @@ -833,6 +840,7 @@ public List initialize() throws Exception { .getConfigurationBytes()); try { + Thread.currentThread().setContextClassLoader(TezClassLoader.getInstance()); ReflectionUtils.getClazz(RELOCALIZATION_TEST_CLASS_NAME); LOG.info("Class found"); FileSystem fs = FileSystem.get(conf); diff --git a/tez-tests/src/test/java/org/apache/tez/test/FaultToleranceTestRunner.java b/tez-tests/src/test/java/org/apache/tez/test/FaultToleranceTestRunner.java index 94242a7fd2..9305711744 100644 --- a/tez-tests/src/test/java/org/apache/tez/test/FaultToleranceTestRunner.java +++ b/tez-tests/src/test/java/org/apache/tez/test/FaultToleranceTestRunner.java @@ -25,7 +25,6 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.yarn.api.records.Resource; -import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.tez.client.TezClientUtils; import org.apache.tez.client.TezClient; import org.apache.tez.dag.api.DAG; @@ -52,10 +51,10 @@ public class FaultToleranceTestRunner { void setup() throws Exception { TezConfiguration tezConf = null; - if (conf == null ) { - tezConf = new TezConfiguration(new YarnConfiguration()); - }else { - tezConf = new TezConfiguration(new YarnConfiguration(this.conf)); + if (conf == null) { + tezConf = new TezConfiguration(); + } else { + tezConf = new TezConfiguration(this.conf); } FileSystem defaultFs = FileSystem.get(tezConf); diff --git a/tez-tests/src/test/java/org/apache/tez/test/MiniTezCluster.java b/tez-tests/src/test/java/org/apache/tez/test/MiniTezCluster.java index c727a8fc38..9af1e604b2 100644 --- a/tez-tests/src/test/java/org/apache/tez/test/MiniTezCluster.java +++ b/tez-tests/src/test/java/org/apache/tez/test/MiniTezCluster.java @@ -28,6 +28,7 @@ import java.util.List; import java.util.Set; +import org.apache.hadoop.fs.CommonConfigurationKeysPublic; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; @@ -49,6 +50,7 @@ import org.apache.hadoop.yarn.server.MiniYARNCluster; import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor; import org.apache.hadoop.yarn.server.nodemanager.DefaultContainerExecutor; +import org.apache.tez.common.TezTestUtils; import org.apache.tez.dag.api.TezConfiguration; import org.apache.tez.dag.api.TezUncheckedException; import org.apache.tez.dag.app.DAGAppMaster; @@ -87,6 +89,12 @@ public MiniTezCluster(String testName, int noOfNMs, super(testName, noOfNMs, numLocalDirs, numLogDirs); } + @Override + public void init(Configuration conf) { + TezTestUtils.ensureHighDiskUtilizationLimit(conf); + super.init(conf); + } + @Override public void serviceInit(Configuration conf) throws Exception { conf.set(MRConfig.FRAMEWORK_NAME, MRConfig.YARN_TEZ_FRAMEWORK_NAME); @@ -135,6 +143,11 @@ public void serviceInit(Configuration conf) throws Exception { conf.setBoolean(YarnConfiguration.NM_VMEM_CHECK_ENABLED, false); conf.set(CommonConfigurationKeys.FS_PERMISSIONS_UMASK_KEY, "000"); + conf.setInt(CommonConfigurationKeys.IPC_CLIENT_CONNECT_MAX_RETRIES_KEY, 1); + conf.setInt(CommonConfigurationKeys.IPC_CLIENT_CONNECT_TIMEOUT_KEY, 1000); + conf.setInt(CommonConfigurationKeysPublic.IPC_CLIENT_CONNECT_MAX_RETRIES_KEY,0); + conf.setInt(CommonConfigurationKeysPublic.IPC_CLIENT_CONNECT_MAX_RETRIES_ON_SOCKET_TIMEOUTS_KEY, 0); + conf.setInt(CommonConfigurationKeysPublic.IPC_CLIENT_CONNECT_TIMEOUT_KEY,1000); try { Path stagingPath = FileContext.getFileContext(conf).makeQualified( diff --git a/tez-tests/src/test/java/org/apache/tez/test/RecoveryServiceWithEventHandlingHook.java b/tez-tests/src/test/java/org/apache/tez/test/RecoveryServiceWithEventHandlingHook.java index c08780f473..186ab7e659 100644 --- a/tez-tests/src/test/java/org/apache/tez/test/RecoveryServiceWithEventHandlingHook.java +++ b/tez-tests/src/test/java/org/apache/tez/test/RecoveryServiceWithEventHandlingHook.java @@ -24,6 +24,8 @@ import java.util.List; import java.util.concurrent.atomic.AtomicBoolean; +import com.google.protobuf.CodedInputStream; +import com.google.protobuf.CodedOutputStream; import org.apache.commons.codec.binary.Base64; import org.apache.hadoop.conf.Configuration; import org.apache.tez.common.ReflectionUtils; @@ -48,7 +50,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.google.common.base.Preconditions; +import org.apache.tez.common.Preconditions; /** * Add hook before/after processing RecoveryEvent & SummaryEvent @@ -222,15 +224,16 @@ public HistoryEvent getHistoryEvent() { private String encodeHistoryEvent(HistoryEvent event) throws IOException { ByteArrayOutputStream out = new ByteArrayOutputStream(); - event.toProtoStream(out); + CodedOutputStream codedOutputStream = CodedOutputStream.newInstance(out); + event.toProtoStream(codedOutputStream); + codedOutputStream.flush(); return event.getClass().getName() + "," + Base64.encodeBase64String(out.toByteArray()); } private HistoryEvent decodeHistoryEvent(String eventClass, String base64) throws IOException { - ByteArrayInputStream in = new ByteArrayInputStream( - Base64.decodeBase64(base64)); + CodedInputStream in = CodedInputStream.newInstance(Base64.decodeBase64(base64)); try { HistoryEvent event = ReflectionUtils.createClazzInstance(eventClass); event.fromProtoStream(in); @@ -331,7 +334,7 @@ public boolean match(HistoryEvent incomingEvent) { TaskStartedEvent otherEvent = (TaskStartedEvent) incomingEvent; TaskStartedEvent conditionEvent = (TaskStartedEvent) event; // compare vertexId and taskId - return otherEvent.getTaskID().getVertexID().getId() == conditionEvent.getTaskID().getVertexID().getId() + return otherEvent.getVertexID().getId() == conditionEvent.getVertexID().getId() && otherEvent.getTaskID().getId() == conditionEvent.getTaskID().getId(); } break; @@ -341,7 +344,7 @@ public boolean match(HistoryEvent incomingEvent) { TaskFinishedEvent otherEvent = (TaskFinishedEvent) incomingEvent; TaskFinishedEvent conditionEvent = (TaskFinishedEvent) event; // compare vertexId and taskId - return otherEvent.getTaskID().getVertexID().getId() == conditionEvent.getTaskID().getVertexID().getId() + return otherEvent.getVertexID().getId() == conditionEvent.getVertexID().getId() && otherEvent.getTaskID().getId() == conditionEvent.getTaskID().getId(); } break; @@ -351,9 +354,9 @@ public boolean match(HistoryEvent incomingEvent) { TaskAttemptStartedEvent otherEvent = (TaskAttemptStartedEvent) incomingEvent; TaskAttemptStartedEvent conditionEvent = (TaskAttemptStartedEvent) event; // compare vertexId, taskId & taskAttemptId - return otherEvent.getTaskAttemptID().getTaskID().getVertexID().getId() - == conditionEvent.getTaskAttemptID().getTaskID().getVertexID().getId() - && otherEvent.getTaskAttemptID().getTaskID().getId() == conditionEvent.getTaskAttemptID().getTaskID().getId() + return otherEvent.getVertexID().getId() + == conditionEvent.getVertexID().getId() + && otherEvent.getTaskID().getId() == conditionEvent.getTaskID().getId() && otherEvent.getTaskAttemptID().getId() == conditionEvent.getTaskAttemptID().getId(); } break; @@ -363,9 +366,9 @@ public boolean match(HistoryEvent incomingEvent) { TaskAttemptFinishedEvent otherEvent = (TaskAttemptFinishedEvent) incomingEvent; TaskAttemptFinishedEvent conditionEvent = (TaskAttemptFinishedEvent) event; // compare vertexId, taskId & taskAttemptId - return otherEvent.getTaskAttemptID().getTaskID().getVertexID().getId() - == conditionEvent.getTaskAttemptID().getTaskID().getVertexID().getId() - && otherEvent.getTaskAttemptID().getTaskID().getId() == conditionEvent.getTaskAttemptID().getTaskID().getId() + return otherEvent.getVertexID().getId() + == conditionEvent.getVertexID().getId() + && otherEvent.getTaskID().getId() == conditionEvent.getTaskID().getId() && otherEvent.getTaskAttemptID().getId() == conditionEvent.getTaskAttemptID().getId(); } break; diff --git a/tez-tests/src/test/java/org/apache/tez/test/SimpleTestDAG.java b/tez-tests/src/test/java/org/apache/tez/test/SimpleTestDAG.java index 90a8cf61ad..36fad117ed 100644 --- a/tez-tests/src/test/java/org/apache/tez/test/SimpleTestDAG.java +++ b/tez-tests/src/test/java/org/apache/tez/test/SimpleTestDAG.java @@ -38,11 +38,13 @@ * v2 * */ -public class SimpleTestDAG { +public final class SimpleTestDAG { static Resource defaultResource = Resource.newInstance(100, 0); public static String TEZ_SIMPLE_DAG_NUM_TASKS = "tez.simple-test-dag.num-tasks"; public static int TEZ_SIMPLE_DAG_NUM_TASKS_DEFAULT = 2; + + private SimpleTestDAG() {} public static DAG createDAG(String name, Configuration conf) throws Exception { @@ -76,10 +78,6 @@ public static DAG createDAG(Configuration conf) throws Exception { * v4 v5 * \ / * v6 - * @param name - * @param conf - * @return - * @throws Exception */ public static DAG createDAGForVertexOrder(String name, Configuration conf) throws Exception{ UserPayload payload = UserPayload.create(null); diff --git a/tez-tests/src/test/java/org/apache/tez/test/SimpleTestDAG3Vertices.java b/tez-tests/src/test/java/org/apache/tez/test/SimpleTestDAG3Vertices.java index a48b2d696b..5873b66978 100644 --- a/tez-tests/src/test/java/org/apache/tez/test/SimpleTestDAG3Vertices.java +++ b/tez-tests/src/test/java/org/apache/tez/test/SimpleTestDAG3Vertices.java @@ -40,11 +40,13 @@ * v3 * */ -public class SimpleTestDAG3Vertices { +public final class SimpleTestDAG3Vertices { static Resource defaultResource = Resource.newInstance(100, 0); public static String TEZ_SIMPLE_DAG_NUM_TASKS = "tez.simple-test-dag-3-vertices.num-tasks"; public static int TEZ_SIMPLE_DAG_NUM_TASKS_DEFAULT = 2; + + private SimpleTestDAG3Vertices() {} public static DAG createDAG(String name, Configuration conf) throws Exception { diff --git a/tez-tests/src/test/java/org/apache/tez/test/TestAM.java b/tez-tests/src/test/java/org/apache/tez/test/TestAM.java new file mode 100644 index 0000000000..a31fa7e8ad --- /dev/null +++ b/tez-tests/src/test/java/org/apache/tez/test/TestAM.java @@ -0,0 +1,166 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tez.test; + +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; +import java.io.File; +import java.io.IOException; +import java.net.HttpURLConnection; +import java.net.URL; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.conf.Configuration.IntegerRanges; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hdfs.MiniDFSCluster; +import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.tez.client.TezClient; +import org.apache.tez.dag.api.DAG; +import org.apache.tez.dag.api.ProcessorDescriptor; +import org.apache.tez.dag.api.TezConfiguration; +import org.apache.tez.dag.api.TezException; +import org.apache.tez.dag.api.Vertex; +import org.apache.tez.dag.api.client.DAGClient; +import org.apache.tez.dag.api.client.DAGStatus; +import org.apache.tez.runtime.library.processor.SleepProcessor; +import org.apache.tez.runtime.library.processor.SleepProcessor.SleepProcessorConfig; +import org.junit.AfterClass; +import org.junit.BeforeClass; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class TestAM { + + private static final Logger LOG = LoggerFactory.getLogger(TestAM.class); + + private static MiniTezCluster tezCluster; + private static MiniDFSCluster dfsCluster; + + private static Configuration conf = new Configuration(); + private static FileSystem remoteFs; + + private static final String TEST_ROOT_DIR = "target" + Path.SEPARATOR + TestAM.class.getName() + "-tmpDir"; + + @BeforeClass + public static void setup() throws IOException { + try { + conf.set(MiniDFSCluster.HDFS_MINIDFS_BASEDIR, TEST_ROOT_DIR); + dfsCluster = new MiniDFSCluster.Builder(conf).numDataNodes(2).format(true).racks(null).build(); + remoteFs = dfsCluster.getFileSystem(); + } catch (IOException io) { + throw new RuntimeException("problem starting mini dfs cluster", io); + } + + if (tezCluster == null) { + tezCluster = new MiniTezCluster(TestAM.class.getName(), 1, 1, 1); + Configuration tezClusterConf = new Configuration(); + tezClusterConf.set("fs.defaultFS", remoteFs.getUri().toString()); // use HDFS + tezClusterConf.setInt("yarn.nodemanager.delete.debug-delay-sec", 20000); + tezClusterConf.setLong(TezConfiguration.TEZ_AM_SLEEP_TIME_BEFORE_EXIT_MILLIS, 2000); + tezClusterConf.set(YarnConfiguration.PROXY_ADDRESS, "localhost"); + //provide temporary profiler script to test /prof endpoint + File profiler = getProfiler(); + profiler.createNewFile(); + profiler.setExecutable(true, false); + tezClusterConf.set(TezConfiguration.TEZ_AM_LAUNCH_CMD_OPTS, ("-Dasync.profiler.home=" + getProfilerHomePath())); + tezCluster.init(tezClusterConf); + tezCluster.start(); + } + } + + @AfterClass + public static void tearDown() { + if (tezCluster != null) { + tezCluster.stop(); + tezCluster = null; + } + if (dfsCluster != null) { + dfsCluster.shutdown(); + dfsCluster = null; + } + getProfiler().delete(); + } + + @Test(timeout = 60000) + public void testAMWebUIService() throws TezException, IOException, InterruptedException { + SleepProcessorConfig spConf = new SleepProcessorConfig(1); + + DAG dag = DAG.create("DAG-testAMWebUIService"); + Vertex vertex = Vertex.create("SleepVertex", + ProcessorDescriptor.create(SleepProcessor.class.getName()).setUserPayload(spConf.toUserPayload()), 1, + Resource.newInstance(1024, 1)); + dag.addVertex(vertex); + + TezConfiguration tezConf = new TezConfiguration(tezCluster.getConfig()); + TezClient tezSession = TezClient.create("TezSleepProcessor", tezConf, false); + tezSession.start(); + + DAGClient dagClient = tezSession.submitDAG(dag); + + DAGStatus dagStatus = dagClient.getDAGStatus(null); + while (!dagStatus.isCompleted()) { + Thread.sleep(500L); + dagStatus = dagClient.getDAGStatus(null); + } + + String webUIAddress = dagClient.getWebUIAddress(); + assertNotNull("getWebUIAddress should return TezAM's web UI address", webUIAddress); + LOG.info("TezAM webUI address: " + webUIAddress); + + checkAddress(webUIAddress + "/jmx"); + checkAddress(webUIAddress + "/conf"); + checkAddress(webUIAddress + "/stacks"); + checkAddress(webUIAddress + "/prof", 202); + checkAddress(webUIAddress + "/prof-output"); + + URL url = new URL(webUIAddress); + IntegerRanges portRange = conf.getRange(TezConfiguration.TEZ_AM_WEBSERVICE_PORT_RANGE, + TezConfiguration.TEZ_AM_WEBSERVICE_PORT_RANGE_DEFAULT); + assertTrue("WebUIService port should be in the defined range (got: " + url.getPort() + ")", + portRange.getRangeStart() <= url.getPort()); + + tezSession.stop(); + } + + private void checkAddress(String url) { + checkAddress(url, 200); + } + + private void checkAddress(String url, int expectedCode) { + boolean success = false; + try { + HttpURLConnection connection = (HttpURLConnection) new URL(url).openConnection(); + connection.connect(); + success = (connection.getResponseCode() == expectedCode); + } catch (Exception e) { + LOG.error("Error while checking url: " + url, e); + } + assertTrue(url + " should be available", success); + } + + private static File getProfiler() { + return new File(getProfilerHomePath(), "profiler.sh"); + } + + private static String getProfilerHomePath() { + return System.getProperty("java.io.tmpdir"); + } +} diff --git a/tez-tests/src/test/java/org/apache/tez/test/TestAMRecovery.java b/tez-tests/src/test/java/org/apache/tez/test/TestAMRecovery.java index f00ae5cb11..7fe3b3acc1 100644 --- a/tez-tests/src/test/java/org/apache/tez/test/TestAMRecovery.java +++ b/tez-tests/src/test/java/org/apache/tez/test/TestAMRecovery.java @@ -26,6 +26,7 @@ import java.util.List; import java.util.Random; +import org.apache.hadoop.fs.CommonConfigurationKeysPublic; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; @@ -175,6 +176,9 @@ public void setup() throws Exception { tezConf.setBoolean( RecoveryService.TEZ_TEST_RECOVERY_DRAIN_EVENTS_WHEN_STOPPED, true); + tezConf.setInt(CommonConfigurationKeysPublic.IPC_CLIENT_CONNECT_MAX_RETRIES_KEY,0); + tezConf.setInt(CommonConfigurationKeysPublic.IPC_CLIENT_CONNECT_MAX_RETRIES_ON_SOCKET_TIMEOUTS_KEY, 0); + tezConf.setInt(CommonConfigurationKeysPublic.IPC_CLIENT_CONNECT_TIMEOUT_KEY,1000); tezSession = TezClient.create("TestDAGRecovery", tezConf); tezSession.start(); } @@ -459,9 +463,9 @@ private List findTaskAttemptFinishedEvent( if (historyEvent.getEventType() == HistoryEventType.TASK_ATTEMPT_FINISHED) { TaskAttemptFinishedEvent taFinishedEvent = (TaskAttemptFinishedEvent) historyEvent; - if (taFinishedEvent.getTaskAttemptID().getTaskID().getVertexID() + if (taFinishedEvent.getVertexID() .getId() == vertexId - && taFinishedEvent.getTaskAttemptID().getTaskID().getId() == taskId) { + && taFinishedEvent.getTaskID().getId() == taskId) { resultEvents.add(taFinishedEvent); } } diff --git a/tez-tests/src/test/java/org/apache/tez/test/TestAMRecoveryAggregationBroadcast.java b/tez-tests/src/test/java/org/apache/tez/test/TestAMRecoveryAggregationBroadcast.java new file mode 100644 index 0000000000..db1083916f --- /dev/null +++ b/tez-tests/src/test/java/org/apache/tez/test/TestAMRecoveryAggregationBroadcast.java @@ -0,0 +1,509 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tez.test; + +import static org.junit.Assert.assertEquals; + +import java.io.BufferedWriter; +import java.io.IOException; +import java.io.OutputStreamWriter; +import java.nio.ByteBuffer; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.Collections; +import java.util.EnumSet; +import java.util.HashMap; +import java.util.List; +import java.util.Random; +import java.util.concurrent.TimeUnit; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hdfs.MiniDFSCluster; +import org.apache.hadoop.io.IntWritable; +import org.apache.hadoop.io.NullWritable; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; +import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; +import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; +import org.apache.hadoop.yarn.api.records.ApplicationId; +import org.apache.hadoop.yarn.client.api.YarnClient; +import org.apache.tez.client.TezClient; +import org.apache.tez.client.TezClientUtils; +import org.apache.tez.common.Preconditions; +import org.apache.tez.common.TezCommonUtils; +import org.apache.tez.common.TezUtils; +import org.apache.tez.common.counters.DAGCounter; +import org.apache.tez.common.counters.TezCounters; +import org.apache.tez.dag.api.DAG; +import org.apache.tez.dag.api.DataSinkDescriptor; +import org.apache.tez.dag.api.DataSourceDescriptor; +import org.apache.tez.dag.api.Edge; +import org.apache.tez.dag.api.EdgeProperty; +import org.apache.tez.dag.api.ProcessorDescriptor; +import org.apache.tez.dag.api.TezConfiguration; +import org.apache.tez.dag.api.TezConstants; +import org.apache.tez.dag.api.UserPayload; +import org.apache.tez.dag.api.Vertex; +import org.apache.tez.dag.api.client.DAGClient; +import org.apache.tez.dag.api.client.DAGStatus; +import org.apache.tez.dag.api.client.DAGStatus.State; +import org.apache.tez.dag.api.client.StatusGetOpts; +import org.apache.tez.dag.api.oldrecords.TaskAttemptState; +import org.apache.tez.dag.app.RecoveryParser; +import org.apache.tez.dag.history.HistoryEvent; +import org.apache.tez.dag.history.HistoryEventType; +import org.apache.tez.dag.history.events.TaskAttemptFinishedEvent; +import org.apache.tez.dag.history.recovery.RecoveryService; +import org.apache.tez.mapreduce.input.MRInput; +import org.apache.tez.mapreduce.output.MROutput; +import org.apache.tez.mapreduce.processor.SimpleMRProcessor; +import org.apache.tez.runtime.api.ProcessorContext; +import org.apache.tez.runtime.library.api.KeyValueReader; +import org.apache.tez.runtime.library.api.KeyValueWriter; +import org.apache.tez.runtime.library.api.KeyValuesReader; +import org.apache.tez.runtime.library.conf.OrderedPartitionedKVEdgeConfig; +import org.apache.tez.runtime.library.conf.UnorderedKVEdgeConfig; +import org.apache.tez.runtime.library.partitioner.HashPartitioner; +import org.junit.After; +import org.junit.AfterClass; +import org.junit.Assert; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class TestAMRecoveryAggregationBroadcast { + private static final Logger LOG = LoggerFactory.getLogger(TestAMRecoveryAggregationBroadcast.class); + private static final String INPUT1 = "Input"; + private static final String INPUT2 = "Input"; + private static final String OUTPUT = "Output"; + private static final String TABLE_SCAN = "TableScan"; + private static final String AGGREGATION = "Aggregation"; + private static final String MAP_JOIN = "MapJoin"; + private static final String TEST_ROOT_DIR = "target" + Path.SEPARATOR + + TestAMRecoveryAggregationBroadcast.class.getName() + "-tmpDir"; + private static final Path INPUT_FILE = new Path(TEST_ROOT_DIR, "input.csv"); + private static final Path OUT_PATH = new Path(TEST_ROOT_DIR, "out-groups"); + private static final String EXPECTED_OUTPUT = "1-5\n1-5\n1-5\n1-5\n1-5\n" + + "2-4\n2-4\n2-4\n2-4\n" + "3-3\n3-3\n3-3\n" + "4-2\n4-2\n" + "5-1\n"; + private static final String TABLE_SCAN_SLEEP = "tez.test.table.scan.sleep"; + private static final String AGGREGATION_SLEEP = "tez.test.aggregation.sleep"; + private static final String MAP_JOIN_SLEEP = "tez.test.map.join.sleep"; + + private static Configuration dfsConf; + private static MiniDFSCluster dfsCluster; + private static MiniTezCluster tezCluster; + private static FileSystem remoteFs; + + private TezConfiguration tezConf; + private TezClient tezSession; + + @BeforeClass + public static void setupAll() { + try { + dfsConf = new Configuration(); + dfsConf.set(MiniDFSCluster.HDFS_MINIDFS_BASEDIR, TEST_ROOT_DIR); + dfsCluster = new MiniDFSCluster.Builder(dfsConf).numDataNodes(3).format(true).racks(null).build(); + remoteFs = dfsCluster.getFileSystem(); + createSampleFile(); + } catch (IOException io) { + throw new RuntimeException("problem starting mini dfs cluster", io); + } + + if (tezCluster == null) { + tezCluster = new MiniTezCluster(TestAMRecoveryAggregationBroadcast.class.getName(), 1, 1, 1); + Configuration conf = new Configuration(dfsConf); + conf.set("fs.defaultFS", remoteFs.getUri().toString()); // use HDFS + conf.setInt("yarn.nodemanager.delete.debug-delay-sec", 20000); + conf.setLong(TezConfiguration.TEZ_AM_SLEEP_TIME_BEFORE_EXIT_MILLIS, 500); + tezCluster.init(conf); + tezCluster.start(); + } + } + + private static void createSampleFile() throws IOException { + FSDataOutputStream out = remoteFs.create(INPUT_FILE); + BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(out)); + // 1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 4, 4, 5 + for (int i = 1; i <= 5; i++) { + for (int j = 0; j <= 5 - i; j++) { + writer.write(String.valueOf(i)); + writer.newLine(); + } + } + writer.close(); + } + + @AfterClass + public static void tearDownAll() { + if (tezCluster != null) { + tezCluster.stop(); + tezCluster = null; + } + if (dfsCluster != null) { + dfsCluster.shutdown(true); + dfsCluster = null; + } + } + + @Before + public void setup() throws Exception { + Path remoteStagingDir = remoteFs.makeQualified(new Path(TEST_ROOT_DIR, String + .valueOf(new Random().nextInt(100000)))); + TezClientUtils.ensureStagingDirExists(dfsConf, remoteStagingDir); + + tezConf = new TezConfiguration(tezCluster.getConfig()); + tezConf.setInt(TezConfiguration.DAG_RECOVERY_MAX_UNFLUSHED_EVENTS, 0); + tezConf.set(TezConfiguration.TEZ_AM_LOG_LEVEL, "INFO"); + tezConf.set(TezConfiguration.TEZ_AM_STAGING_DIR, remoteStagingDir.toString()); + tezConf.setInt(TezConfiguration.TEZ_AM_RESOURCE_MEMORY_MB, 500); + tezConf.set(TezConfiguration.TEZ_AM_LAUNCH_CMD_OPTS, " -Xmx256m"); + tezConf.setBoolean(TezConfiguration.TEZ_AM_STAGING_SCRATCH_DATA_AUTO_DELETE, false); + tezConf.setBoolean(RecoveryService.TEZ_TEST_RECOVERY_DRAIN_EVENTS_WHEN_STOPPED, true); + + tezSession = TezClient.create("TestAMRecoveryAggregationBroadcast", tezConf); + tezSession.start(); + } + + @After + public void teardown() throws InterruptedException { + if (tezSession != null) { + try { + LOG.info("Stopping Tez Session"); + tezSession.stop(); + } catch (Exception e) { + LOG.error("Failed to stop Tez session", e); + } + } + tezSession = null; + } + + @Test(timeout = 120000) + public void testSucceed() throws Exception { + DAG dag = createDAG("Succeed"); + TezCounters counters = runDAGAndVerify(dag, false); + assertEquals(3, counters.findCounter(DAGCounter.NUM_SUCCEEDED_TASKS).getValue()); + + List historyEvents1 = readRecoveryLog(1); + assertEquals(1, findTaskAttemptFinishedEvent(historyEvents1, 0, 0).size()); + assertEquals(1, findTaskAttemptFinishedEvent(historyEvents1, 1, 0).size()); + assertEquals(1, findTaskAttemptFinishedEvent(historyEvents1, 2, 0).size()); + + // No retry happens + assertEquals(Collections.emptyList(), readRecoveryLog(2)); + } + + @Test(timeout = 120000) + public void testTableScanTemporalFailure() throws Exception { + tezConf.setBoolean(TABLE_SCAN_SLEEP, true); + DAG dag = createDAG("TableScanTemporalFailure"); + TezCounters counters = runDAGAndVerify(dag, true); + assertEquals(3, counters.findCounter(DAGCounter.NUM_SUCCEEDED_TASKS).getValue()); + + List historyEvents1 = readRecoveryLog(1); + assertEquals(0, findTaskAttemptFinishedEvent(historyEvents1, 0, 0).size()); + assertEquals(0, findTaskAttemptFinishedEvent(historyEvents1, 1, 0).size()); + assertEquals(0, findTaskAttemptFinishedEvent(historyEvents1, 2, 0).size()); + + List historyEvents2 = readRecoveryLog(2); + assertEquals(1, findTaskAttemptFinishedEvent(historyEvents2, 0, 0).size()); + assertEquals(1, findTaskAttemptFinishedEvent(historyEvents2, 1, 0).size()); + assertEquals(1, findTaskAttemptFinishedEvent(historyEvents2, 2, 0).size()); + + assertEquals(Collections.emptyList(), readRecoveryLog(3)); + } + + @Test(timeout = 120000) + public void testAggregationTemporalFailure() throws Exception { + tezConf.setBoolean(AGGREGATION_SLEEP, true); + DAG dag = createDAG("AggregationTemporalFailure"); + TezCounters counters = runDAGAndVerify(dag, true); + assertEquals(3, counters.findCounter(DAGCounter.NUM_SUCCEEDED_TASKS).getValue()); + + List historyEvents1 = readRecoveryLog(1); + assertEquals(1, findTaskAttemptFinishedEvent(historyEvents1, 0, 0).size()); + assertEquals(0, findTaskAttemptFinishedEvent(historyEvents1, 1, 0).size()); + assertEquals(0, findTaskAttemptFinishedEvent(historyEvents1, 2, 0).size()); + + List historyEvents2 = readRecoveryLog(2); + assertEquals(0, findTaskAttemptFinishedEvent(historyEvents2, 0, 0).size()); + assertEquals(1, findTaskAttemptFinishedEvent(historyEvents2, 1, 0).size()); + assertEquals(1, findTaskAttemptFinishedEvent(historyEvents2, 2, 0).size()); + + assertEquals(Collections.emptyList(), readRecoveryLog(3)); + } + + @Test(timeout = 120000) + public void testMapJoinTemporalFailure() throws Exception { + tezConf.setBoolean(MAP_JOIN_SLEEP, true); + DAG dag = createDAG("MapJoinTemporalFailure"); + TezCounters counters = runDAGAndVerify(dag, true); + assertEquals(3, counters.findCounter(DAGCounter.NUM_SUCCEEDED_TASKS).getValue()); + + List historyEvents1 = readRecoveryLog(1); + assertEquals(1, findTaskAttemptFinishedEvent(historyEvents1, 0, 0).size()); + assertEquals(1, findTaskAttemptFinishedEvent(historyEvents1, 1, 0).size()); + assertEquals(0, findTaskAttemptFinishedEvent(historyEvents1, 2, 0).size()); + + List historyEvents2 = readRecoveryLog(2); + assertEquals(0, findTaskAttemptFinishedEvent(historyEvents2, 0, 0).size()); + assertEquals(0, findTaskAttemptFinishedEvent(historyEvents2, 1, 0).size()); + assertEquals(1, findTaskAttemptFinishedEvent(historyEvents2, 2, 0).size()); + + assertEquals(Collections.emptyList(), readRecoveryLog(3)); + } + + /** + * v1 scans lines and emit (line -> 1), imitating a simple Map vertex. + * v2 aggregates the lines and emit (line -> # of duplicated values), imitating an aggregation. + * v3 joins the output of v2 with another input. v2 broadcasts its output. + * (input1) + * \ + * v1 + * \ + * v2 (input2) + * \ / + * v3 + */ + private DAG createDAG(String dagName) throws Exception { + UserPayload payload = TezUtils.createUserPayloadFromConf(tezConf); + DataSourceDescriptor dataSource = MRInput + .createConfigBuilder(new Configuration(tezConf), TextInputFormat.class, + INPUT_FILE.toString()) + .build(); + // each line -> 1 + Vertex tableScanVertex = Vertex + .create(TABLE_SCAN, ProcessorDescriptor.create(TableScanProcessor.class.getName()) + .setUserPayload(payload)) + .addDataSource(INPUT1, dataSource); + + // key -> num keys + Vertex aggregationVertex = Vertex + .create(AGGREGATION, ProcessorDescriptor + .create(AggregationProcessor.class.getName()).setUserPayload(payload), 1); + + DataSinkDescriptor dataSink = MROutput + .createConfigBuilder(new Configuration(tezConf), TextOutputFormat.class, + OUT_PATH.toString()) + .build(); + // Broadcast Hash Join + Vertex mapJoinVertex = Vertex + .create(MAP_JOIN, ProcessorDescriptor.create(MapJoinProcessor.class.getName()) + .setUserPayload(payload)) + .addDataSource(INPUT2, dataSource) + .addDataSink(OUTPUT, dataSink); + + EdgeProperty orderedEdge = OrderedPartitionedKVEdgeConfig + .newBuilder(Text.class.getName(), IntWritable.class.getName(), HashPartitioner.class.getName()) + .setFromConfiguration(tezConf) + .build() + .createDefaultEdgeProperty(); + EdgeProperty broadcastEdge = UnorderedKVEdgeConfig + .newBuilder(Text.class.getName(), IntWritable.class.getName()) + .setFromConfiguration(tezConf) + .build() + .createDefaultBroadcastEdgeProperty(); + + DAG dag = DAG.create("TestAMRecoveryAggregationBroadcast_" + dagName); + dag.addVertex(tableScanVertex) + .addVertex(aggregationVertex) + .addVertex(mapJoinVertex) + .addEdge(Edge.create(tableScanVertex, aggregationVertex, orderedEdge)) + .addEdge(Edge.create(aggregationVertex, mapJoinVertex, broadcastEdge)); + return dag; + } + + TezCounters runDAGAndVerify(DAG dag, boolean killAM) throws Exception { + tezSession.waitTillReady(); + DAGClient dagClient = tezSession.submitDAG(dag); + + if (killAM) { + TimeUnit.SECONDS.sleep(10); + YarnClient yarnClient = YarnClient.createYarnClient(); + yarnClient.init(tezConf); + yarnClient.start(); + ApplicationAttemptId id = ApplicationAttemptId.newInstance(tezSession.getAppMasterApplicationId(), 1); + yarnClient.failApplicationAttempt(id); + yarnClient.close(); + } + DAGStatus dagStatus = dagClient.waitForCompletionWithStatusUpdates(EnumSet.of(StatusGetOpts.GET_COUNTERS)); + LOG.info("Diagnosis: " + dagStatus.getDiagnostics()); + Assert.assertEquals(State.SUCCEEDED, dagStatus.getState()); + + FSDataInputStream in = remoteFs.open(new Path(OUT_PATH, "part-v002-o000-r-00000")); + ByteBuffer buf = ByteBuffer.allocate(100); + in.read(buf); + buf.flip(); + Assert.assertEquals(EXPECTED_OUTPUT, StandardCharsets.UTF_8.decode(buf).toString()); + return dagStatus.getDAGCounters(); + } + + private List readRecoveryLog(int attemptNum) throws IOException { + ApplicationId appId = tezSession.getAppMasterApplicationId(); + Path tezSystemStagingDir = TezCommonUtils.getTezSystemStagingPath(tezConf, appId.toString()); + Path recoveryDataDir = TezCommonUtils.getRecoveryPath(tezSystemStagingDir, tezConf); + FileSystem fs = tezSystemStagingDir.getFileSystem(tezConf); + List historyEvents = new ArrayList<>(); + Path currentAttemptRecoveryDataDir = TezCommonUtils.getAttemptRecoveryPath(recoveryDataDir, attemptNum); + Path recoveryFilePath = + new Path(currentAttemptRecoveryDataDir, appId.toString().replace("application", "dag") + + "_1" + TezConstants.DAG_RECOVERY_RECOVER_FILE_SUFFIX); + if (fs.exists(recoveryFilePath)) { + LOG.info("Read recovery file:" + recoveryFilePath); + historyEvents.addAll(RecoveryParser.parseDAGRecoveryFile(fs.open(recoveryFilePath))); + } + printHistoryEvents(historyEvents, attemptNum); + return historyEvents; + } + + private void printHistoryEvents(List historyEvents, int attemptId) { + LOG.info("RecoveryLogs from attempt:" + attemptId); + for(HistoryEvent historyEvent : historyEvents) { + LOG.info("Parsed event from recovery stream" + + ", eventType=" + historyEvent.getEventType() + + ", event=" + historyEvent); + } + LOG.info(""); + } + + private List findTaskAttemptFinishedEvent( + List historyEvents, int vertexId, int taskId) { + List resultEvents = new ArrayList<>(); + for (HistoryEvent historyEvent : historyEvents) { + if (historyEvent.getEventType() == HistoryEventType.TASK_ATTEMPT_FINISHED) { + TaskAttemptFinishedEvent taFinishedEvent = (TaskAttemptFinishedEvent) historyEvent; + if (taFinishedEvent.getState() == TaskAttemptState.KILLED) { + continue; + } + if (taFinishedEvent.getVertexID().getId() == vertexId && taFinishedEvent.getTaskID().getId() == taskId) { + resultEvents.add(taFinishedEvent); + } + } + } + return resultEvents; + } + + public static class TableScanProcessor extends SimpleMRProcessor { + private static final IntWritable one = new IntWritable(1); + + private final boolean sleep; + + public TableScanProcessor(ProcessorContext context) { + super(context); + try { + Configuration conf = TezUtils.createConfFromUserPayload(getContext().getUserPayload()); + sleep = conf.getBoolean(TABLE_SCAN_SLEEP, false); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + @Override + public void run() throws Exception { + if (getContext().getDAGAttemptNumber() == 1 && sleep) { + TimeUnit.SECONDS.sleep(60); + } + Preconditions.checkArgument(getInputs().size() == 1); + Preconditions.checkArgument(getOutputs().size() == 1); + KeyValueReader kvReader = (KeyValueReader) getInputs().get(INPUT1).getReader(); + KeyValueWriter kvWriter = (KeyValueWriter) getOutputs().get(AGGREGATION).getWriter(); + while (kvReader.next()) { + Text line = (Text) kvReader.getCurrentValue(); + kvWriter.write(line, one); + } + } + } + + public static class AggregationProcessor extends SimpleMRProcessor { + private final boolean sleep; + + public AggregationProcessor(ProcessorContext context) { + super(context); + try { + Configuration conf = TezUtils.createConfFromUserPayload(getContext().getUserPayload()); + sleep = conf.getBoolean(AGGREGATION_SLEEP, false); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + @Override + public void run() throws Exception { + if (getContext().getDAGAttemptNumber() == 1 && sleep) { + TimeUnit.SECONDS.sleep(60); + } + + Preconditions.checkArgument(getInputs().size() == 1); + Preconditions.checkArgument(getOutputs().size() == 1); + KeyValuesReader kvReader = (KeyValuesReader) getInputs().get(TABLE_SCAN).getReader(); + KeyValueWriter kvWriter = (KeyValueWriter) getOutputs().get(MAP_JOIN).getWriter(); + while (kvReader.next()) { + Text word = (Text) kvReader.getCurrentKey(); + int sum = 0; + for (Object value : kvReader.getCurrentValues()) { + sum += ((IntWritable) value).get(); + } + kvWriter.write(word, new IntWritable(sum)); + } + } + } + + public static class MapJoinProcessor extends SimpleMRProcessor { + private final boolean sleep; + + public MapJoinProcessor(ProcessorContext context) { + super(context); + try { + Configuration conf = TezUtils.createConfFromUserPayload(getContext().getUserPayload()); + sleep = conf.getBoolean(MAP_JOIN_SLEEP, false); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + @Override + public void run() throws Exception { + if (getContext().getDAGAttemptNumber() == 1 && sleep) { + TimeUnit.SECONDS.sleep(60); + } + + Preconditions.checkArgument(getInputs().size() == 2); + Preconditions.checkArgument(getOutputs().size() == 1); + KeyValueReader broadcastKvReader = (KeyValueReader) getInputs().get(AGGREGATION).getReader(); + HashMap countMap = new HashMap<>(); + while (broadcastKvReader.next()) { + String key = broadcastKvReader.getCurrentKey().toString(); + int value = ((IntWritable) broadcastKvReader.getCurrentValue()).get(); + countMap.put(key, value); + } + + KeyValueReader kvReader = (KeyValueReader) getInputs().get(INPUT2).getReader(); + KeyValueWriter kvWriter = (KeyValueWriter) getOutputs().get(OUTPUT).getWriter(); + while (kvReader.next()) { + String line = kvReader.getCurrentValue().toString(); + int count = countMap.getOrDefault(line, 0); + kvWriter.write(NullWritable.get(), String.format("%s-%d", line, count)); + } + } + } +} diff --git a/tez-tests/src/test/java/org/apache/tez/test/TestDAGRecovery.java b/tez-tests/src/test/java/org/apache/tez/test/TestDAGRecovery.java index b0c9ccc40a..cf4744b2a2 100644 --- a/tez-tests/src/test/java/org/apache/tez/test/TestDAGRecovery.java +++ b/tez-tests/src/test/java/org/apache/tez/test/TestDAGRecovery.java @@ -18,6 +18,9 @@ package org.apache.tez.test; +import org.apache.hadoop.fs.CommonConfigurationKeysPublic; +import org.apache.log4j.Level; +import org.apache.log4j.LogManager; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; @@ -133,6 +136,9 @@ public void setup() throws Exception { tezConf.set(TezConfiguration.TEZ_AM_LAUNCH_CMD_OPTS, " -Xmx256m"); tezConf.setBoolean(TezConfiguration.TEZ_AM_SESSION_MODE, true); tezConf.set(TezConfiguration.TEZ_AM_STAGING_SCRATCH_DATA_AUTO_DELETE, "false"); + tezConf.setInt(CommonConfigurationKeysPublic.IPC_CLIENT_CONNECT_MAX_RETRIES_KEY,0); + tezConf.setInt(CommonConfigurationKeysPublic.IPC_CLIENT_CONNECT_MAX_RETRIES_ON_SOCKET_TIMEOUTS_KEY, 0); + tezConf.setInt(CommonConfigurationKeysPublic.IPC_CLIENT_CONNECT_TIMEOUT_KEY,1000); tezSession = TezClient.create("TestDAGRecovery", tezConf); tezSession.start(); @@ -154,7 +160,7 @@ public void teardown() throws InterruptedException { void runDAGAndVerify(DAG dag, DAGStatus.State finalState) throws Exception { tezSession.waitTillReady(); DAGClient dagClient = tezSession.submitDAG(dag); - DAGStatus dagStatus = dagClient.getDAGStatus(null); + DAGStatus dagStatus = dagClient.getDAGStatus(null, 10); while (!dagStatus.isCompleted()) { LOG.info("Waiting for dag to complete. Sleeping for 500ms." + " DAG name: " + dag.getName() diff --git a/tez-tests/src/test/java/org/apache/tez/test/TestDriver.java b/tez-tests/src/test/java/org/apache/tez/test/TestDriver.java index bf04fd5369..6da14dfee1 100644 --- a/tez-tests/src/test/java/org/apache/tez/test/TestDriver.java +++ b/tez-tests/src/test/java/org/apache/tez/test/TestDriver.java @@ -20,9 +20,11 @@ import org.apache.hadoop.util.ProgramDriver; -public class TestDriver { +public final class TestDriver { - public static void main(String argv[]){ + private TestDriver() {} + + public static void main(String[] argv){ int exitCode = -1; ProgramDriver pgd = new ProgramDriver(); try { diff --git a/tez-tests/src/test/java/org/apache/tez/test/TestExceptionPropagation.java b/tez-tests/src/test/java/org/apache/tez/test/TestExceptionPropagation.java index 404e324dd6..f66bc93f7f 100644 --- a/tez-tests/src/test/java/org/apache/tez/test/TestExceptionPropagation.java +++ b/tez-tests/src/test/java/org/apache/tez/test/TestExceptionPropagation.java @@ -116,7 +116,7 @@ private void startMiniTezCluster() { try { conf.set(MiniDFSCluster.HDFS_MINIDFS_BASEDIR, TEST_ROOT_DIR); dfsCluster = - new MiniDFSCluster.Builder(conf).numDataNodes(3).format(true) + new MiniDFSCluster.Builder(conf).numDataNodes(1).storagesPerDatanode(1).format(true) .racks(null).build(); remoteFs = dfsCluster.getFileSystem(); } catch (IOException io) { @@ -295,7 +295,6 @@ public void testExceptionPropagationNonSession() throws Exception { appReport.getDiagnostics().trim()); } finally { stopNonSessionClient(); - Thread.sleep(10*1000); stopTezMiniCluster(); } } @@ -630,7 +629,6 @@ public void run(Map inputs, output.start(); output.getWriter(); - Thread.sleep(3*1000); if (this.exLocation == ExceptionLocation.PROCESSOR_RUN_ERROR) { throw new Error(this.exLocation.name()); } else if (this.exLocation == ExceptionLocation.PROCESSOR_RUN_EXCEPTION) { diff --git a/tez-tests/src/test/java/org/apache/tez/test/TestFaultTolerance.java b/tez-tests/src/test/java/org/apache/tez/test/TestFaultTolerance.java index e2fc53f69a..bd70746508 100644 --- a/tez-tests/src/test/java/org/apache/tez/test/TestFaultTolerance.java +++ b/tez-tests/src/test/java/org/apache/tez/test/TestFaultTolerance.java @@ -30,6 +30,7 @@ import org.apache.tez.runtime.library.cartesianproduct.CartesianProductConfig; import org.apache.tez.runtime.library.cartesianproduct.CartesianProductEdgeManager; import org.apache.tez.runtime.library.cartesianproduct.CartesianProductVertexManager; +import org.apache.tez.test.dag.*; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -51,11 +52,6 @@ import org.apache.tez.dag.api.Vertex; import org.apache.tez.dag.api.client.DAGClient; import org.apache.tez.dag.api.client.DAGStatus; -import org.apache.tez.test.dag.SimpleReverseVTestDAG; -import org.apache.tez.test.dag.SimpleVTestDAG; -import org.apache.tez.test.dag.SixLevelsFailingDAG; -import org.apache.tez.test.dag.ThreeLevelsFailingDAG; -import org.apache.tez.test.dag.TwoLevelsFailingDAG; import org.junit.AfterClass; import org.junit.Assert; import org.junit.Ignore; @@ -411,21 +407,24 @@ public void testMultiVersionInputFailureWithoutExit() throws Exception { @Test (timeout=60000) public void testTwoLevelsFailingDAGSuccess() throws Exception { Configuration testConf = new Configuration(); - DAG dag = TwoLevelsFailingDAG.createDAG("testTwoLevelsFailingDAGSuccess", testConf); + DAG dag = new FailingDagBuilder(FailingDagBuilder.Levels.TWO) + .withName("testTwoLevelsFailingDAGSuccess").withConf(testConf).build(); runDAGAndVerify(dag, DAGStatus.State.SUCCEEDED); } @Test (timeout=60000) public void testThreeLevelsFailingDAGSuccess() throws Exception { Configuration testConf = new Configuration(); - DAG dag = ThreeLevelsFailingDAG.createDAG("testThreeLevelsFailingDAGSuccess", testConf); + DAG dag = new FailingDagBuilder(FailingDagBuilder.Levels.THREE) + .withName("testThreeLevelsFailingDAGSuccess").withConf(testConf).build(); runDAGAndVerify(dag, DAGStatus.State.SUCCEEDED); } @Test (timeout=60000) public void testSixLevelsFailingDAGSuccess() throws Exception { Configuration testConf = new Configuration(); - DAG dag = SixLevelsFailingDAG.createDAG("testSixLevelsFailingDAGSuccess", testConf); + DAG dag = new FailingDagBuilder(FailingDagBuilder.Levels.SIX) + .withName("testSixLevelsFailingDAGSuccess").withConf(testConf).build(); runDAGAndVerify(dag, DAGStatus.State.SUCCEEDED); } @@ -462,8 +461,9 @@ public void testThreeLevelsFailingDAG2VerticesHaveFailedAttemptsDAGSucceeds() th TestProcessor.TEZ_FAILING_PROCESSOR_VERIFY_TASK_INDEX, "l3v1"), "0"); testConf.setInt(TestProcessor.getVertexConfName( TestProcessor.TEZ_FAILING_PROCESSOR_VERIFY_VALUE, "l3v1", 0), 15); - - DAG dag = ThreeLevelsFailingDAG.createDAG("testThreeLevelsFailingDAG2VerticesHaveFailedAttemptsDAGSucceeds", testConf); + + DAG dag = new FailingDagBuilder(FailingDagBuilder.Levels.THREE) + .withName("testThreeLevelsFailingDAG2VerticesHaveFailedAttemptsDAGSucceeds").withConf(testConf).build(); runDAGAndVerify(dag, DAGStatus.State.SUCCEEDED); } @@ -801,7 +801,8 @@ public void testRandomFailingTasks() throws Exception { Configuration testConf = new Configuration(false); testConf.setBoolean(TestProcessor.TEZ_FAILING_PROCESSOR_DO_RANDOM_FAIL, true); testConf.setFloat(TestProcessor.TEZ_FAILING_PROCESSOR_RANDOM_FAIL_PROBABILITY, 0.5f); - DAG dag = SixLevelsFailingDAG.createDAG("testRandomFailingTasks", testConf); + DAG dag = new FailingDagBuilder(FailingDagBuilder.Levels.SIX) + .withName("testRandomFailingTasks").withConf(testConf).build(); runDAGAndVerify(dag, DAGStatus.State.SUCCEEDED); } @@ -811,7 +812,8 @@ public void testRandomFailingInputs() throws Exception { Configuration testConf = new Configuration(false); testConf.setBoolean(TestInput.TEZ_FAILING_INPUT_DO_RANDOM_FAIL, true); testConf.setFloat(TestInput.TEZ_FAILING_INPUT_RANDOM_FAIL_PROBABILITY, 0.5f); - DAG dag = SixLevelsFailingDAG.createDAG("testRandomFailingInputs", testConf); + DAG dag = new FailingDagBuilder(FailingDagBuilder.Levels.SIX) + .withName("testRandomFailingInputs").withConf(testConf).build(); runDAGAndVerify(dag, DAGStatus.State.SUCCEEDED); } @@ -841,7 +843,7 @@ public void testNoProgress() throws Exception { public void testCartesianProduct() throws Exception { Configuration dagConf = new Configuration(); dagConf.setDouble(TezConfiguration.TEZ_TASK_MAX_ALLOWED_OUTPUT_FAILURES_FRACTION, 0.25); - DAG dag = DAG.create("dag"); + DAG dag = DAG.create("DAG-testCartesianProduct"); Configuration vertexConf = new Configuration(); vertexConf.setInt(TestProcessor.getVertexConfName( diff --git a/tez-tests/src/test/java/org/apache/tez/test/TestInput.java b/tez-tests/src/test/java/org/apache/tez/test/TestInput.java index 811ca3cc17..7ad50b7e11 100644 --- a/tez-tests/src/test/java/org/apache/tez/test/TestInput.java +++ b/tez-tests/src/test/java/org/apache/tez/test/TestInput.java @@ -175,7 +175,7 @@ public int doRead() { for (int i=0; i inputEvents) throws Exception { if (event instanceof DataMovementEvent) { DataMovementEvent dmEvent = (DataMovementEvent) event; numCompletedInputs++; - LOG.info(getContext().getSourceVertexName() + " Received DataMovement event sourceId : " + dmEvent.getSourceIndex() + + LOG.info(getContext().getInputOutputVertexNames() + + " Received DataMovement event sourceId : " + dmEvent.getSourceIndex() + " targetId: " + dmEvent.getTargetIndex() + " version: " + dmEvent.getVersion() + " numInputs: " + getNumPhysicalInputs() + @@ -391,7 +393,7 @@ public void handleEvents(List inputEvents) throws Exception { @Override public List close() throws Exception { - getContext().getCounters().findCounter(COUNTER_NAME, COUNTER_NAME).increment(1);; + getContext().getCounters().findCounter(COUNTER_NAME, COUNTER_NAME).increment(1); return null; } diff --git a/tez-tests/src/test/java/org/apache/tez/test/TestLocalMode.java b/tez-tests/src/test/java/org/apache/tez/test/TestLocalMode.java index 2a5b65fa2c..035d39b13e 100644 --- a/tez-tests/src/test/java/org/apache/tez/test/TestLocalMode.java +++ b/tez-tests/src/test/java/org/apache/tez/test/TestLocalMode.java @@ -20,12 +20,16 @@ import java.io.File; import java.io.IOException; +import java.util.Arrays; +import java.util.Collection; import java.util.List; import java.util.Map; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hdfs.MiniDFSCluster; import org.apache.tez.client.TezClient; import org.apache.tez.dag.api.DAG; import org.apache.tez.dag.api.ProcessorDescriptor; @@ -35,6 +39,7 @@ import org.apache.tez.dag.api.Vertex; import org.apache.tez.dag.api.client.DAGClient; import org.apache.tez.dag.api.client.DAGStatus; +import org.apache.tez.dag.api.client.VertexStatus; import org.apache.tez.examples.OrderedWordCount; import org.apache.tez.runtime.api.AbstractLogicalIOProcessor; import org.apache.tez.runtime.api.Event; @@ -43,46 +48,111 @@ import org.apache.tez.runtime.api.ProcessorContext; import org.apache.tez.runtime.library.api.TezRuntimeConfiguration; import org.apache.tez.runtime.library.processor.SleepProcessor; +import org.junit.AfterClass; +import org.junit.BeforeClass; import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; import static org.junit.Assert.*; +/** + * Tests for running Tez in local execution mode (without YARN). + */ +@RunWith(Parameterized.class) public class TestLocalMode { - private static final File TEST_DIR = new File( - System.getProperty("test.build.data", - System.getProperty("java.io.tmpdir")), "TestLocalMode-tez-localmode"); + /** + * In order to be able to safely get VertexStatus from a running DAG, + * the DAG needs to run for a certain amount of time, see TEZ-4475 for details. + */ + private static final int SLEEP_PROCESSOR_TIME_TO_SLEEP_MS = 500; + + private static final File STAGING_DIR = new File(System.getProperty("test.build.data"), + TestLocalMode.class.getName()); + + private static MiniDFSCluster dfsCluster; + private static FileSystem remoteFs; + + private final boolean useDfs; + private final boolean useLocalModeWithoutNetwork; + + @Parameterized.Parameters(name = "useDFS:{0} useLocalModeWithoutNetwork:{1}") + public static Collection params() { + return Arrays.asList(new Object[][]{{false, false}, {true, false}, {false, true}, {true, true}}); + } + + public TestLocalMode(boolean useDfs, boolean useLocalModeWithoutNetwork) { + this.useDfs = useDfs; + this.useLocalModeWithoutNetwork = useLocalModeWithoutNetwork; + } + + @BeforeClass + public static void beforeClass() throws Exception { + try { + Configuration conf = new Configuration(); + dfsCluster = + new MiniDFSCluster.Builder(conf).numDataNodes(3).format(true) + .racks(null).build(); + remoteFs = dfsCluster.getFileSystem(); + } catch (IOException io) { + throw new RuntimeException("problem starting mini dfs cluster", io); + } + } + + @AfterClass + public static void afterClass() throws InterruptedException { + if (dfsCluster != null) { + try { + dfsCluster.shutdown(); + } catch (Exception e) { + e.printStackTrace(); + } + } + } + + private TezConfiguration createConf() { + TezConfiguration conf = new TezConfiguration(); + conf.setBoolean(TezConfiguration.TEZ_LOCAL_MODE, true); + conf.setBoolean(TezConfiguration.TEZ_LOCAL_MODE_WITHOUT_NETWORK, useLocalModeWithoutNetwork); + + if (useDfs) { + conf.set("fs.defaultFS", remoteFs.getUri().toString()); + } else { + conf.set("fs.defaultFS", "file:///"); + } + conf.set(TezConfiguration.TEZ_AM_STAGING_DIR, STAGING_DIR.getAbsolutePath()); + conf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_OPTIMIZE_LOCAL_FETCH, true); + return conf; + } @Test(timeout = 30000) public void testMultipleClientsWithSession() throws TezException, InterruptedException, IOException { - TezConfiguration tezConf1 = new TezConfiguration(); - tezConf1.setBoolean(TezConfiguration.TEZ_LOCAL_MODE, true); - tezConf1.set("fs.defaultFS", "file:///"); - tezConf1.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_OPTIMIZE_LOCAL_FETCH, true); + TezConfiguration tezConf1 = createConf(); TezClient tezClient1 = TezClient.create("commonName", tezConf1, true); tezClient1.start(); - DAG dag1 = createSimpleDAG("dag1", SleepProcessor.class.getName()); + DAG dag1 = createSimpleDAG("testMultipleClientsWithSession", SleepProcessor.class.getName()); DAGClient dagClient1 = tezClient1.submitDAG(dag1); dagClient1.waitForCompletion(); assertEquals(DAGStatus.State.SUCCEEDED, dagClient1.getDAGStatus(null).getState()); + assertEquals(VertexStatus.State.SUCCEEDED, + dagClient1.getVertexStatus(SleepProcessor.SLEEP_VERTEX_NAME, null).getState()); dagClient1.close(); tezClient1.stop(); - - TezConfiguration tezConf2 = new TezConfiguration(); - tezConf2.setBoolean(TezConfiguration.TEZ_LOCAL_MODE, true); - tezConf2.set("fs.defaultFS", "file:///"); - tezConf2.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_OPTIMIZE_LOCAL_FETCH, true); - DAG dag2 = createSimpleDAG("dag2", SleepProcessor.class.getName()); + TezConfiguration tezConf2 = createConf(); + DAG dag2 = createSimpleDAG("testMultipleClientsWithSession_2", SleepProcessor.class.getName()); TezClient tezClient2 = TezClient.create("commonName", tezConf2, true); tezClient2.start(); DAGClient dagClient2 = tezClient2.submitDAG(dag2); dagClient2.waitForCompletion(); assertEquals(DAGStatus.State.SUCCEEDED, dagClient2.getDAGStatus(null).getState()); + assertEquals(VertexStatus.State.SUCCEEDED, + dagClient2.getVertexStatus(SleepProcessor.SLEEP_VERTEX_NAME, null).getState()); assertFalse(dagClient1.getExecutionContext().equals(dagClient2.getExecutionContext())); dagClient2.close(); tezClient2.stop(); @@ -91,33 +161,30 @@ public void testMultipleClientsWithSession() throws TezException, InterruptedExc @Test(timeout = 10000) public void testMultipleClientsWithoutSession() throws TezException, InterruptedException, IOException { - TezConfiguration tezConf1 = new TezConfiguration(); - tezConf1.setBoolean(TezConfiguration.TEZ_LOCAL_MODE, true); - tezConf1.set("fs.defaultFS", "file:///"); - tezConf1.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_OPTIMIZE_LOCAL_FETCH, true); + TezConfiguration tezConf1 = createConf(); TezClient tezClient1 = TezClient.create("commonName", tezConf1, false); tezClient1.start(); - DAG dag1 = createSimpleDAG("dag1", SleepProcessor.class.getName()); + DAG dag1 = createSimpleDAG("testMultipleClientsWithoutSession", SleepProcessor.class.getName()); DAGClient dagClient1 = tezClient1.submitDAG(dag1); dagClient1.waitForCompletion(); assertEquals(DAGStatus.State.SUCCEEDED, dagClient1.getDAGStatus(null).getState()); - + assertEquals(VertexStatus.State.SUCCEEDED, + dagClient1.getVertexStatus(SleepProcessor.SLEEP_VERTEX_NAME, null).getState()); dagClient1.close(); tezClient1.stop(); - TezConfiguration tezConf2 = new TezConfiguration(); - tezConf2.setBoolean(TezConfiguration.TEZ_LOCAL_MODE, true); - tezConf2.set("fs.defaultFS", "file:///"); - tezConf2.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_OPTIMIZE_LOCAL_FETCH, true); - DAG dag2 = createSimpleDAG("dag2", SleepProcessor.class.getName()); + TezConfiguration tezConf2 = createConf(); + DAG dag2 = createSimpleDAG("testMultipleClientsWithoutSession_2", SleepProcessor.class.getName()); TezClient tezClient2 = TezClient.create("commonName", tezConf2, false); tezClient2.start(); DAGClient dagClient2 = tezClient2.submitDAG(dag2); dagClient2.waitForCompletion(); assertEquals(DAGStatus.State.SUCCEEDED, dagClient2.getDAGStatus(null).getState()); + assertEquals(VertexStatus.State.SUCCEEDED, + dagClient2.getVertexStatus(SleepProcessor.SLEEP_VERTEX_NAME, null).getState()); assertFalse(dagClient1.getExecutionContext().equals(dagClient2.getExecutionContext())); dagClient2.close(); tezClient2.stop(); @@ -126,20 +193,18 @@ public void testMultipleClientsWithoutSession() throws TezException, Interrupted @Test(timeout = 20000) public void testNoSysExitOnSuccessfulDAG() throws TezException, InterruptedException, IOException { - TezConfiguration tezConf1 = new TezConfiguration(); - tezConf1.setBoolean(TezConfiguration.TEZ_LOCAL_MODE, true); - tezConf1.set("fs.defaultFS", "file:///"); - tezConf1.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_OPTIMIZE_LOCAL_FETCH, true); + TezConfiguration tezConf1 = createConf(); // Run in non-session mode so that the AM terminates TezClient tezClient1 = TezClient.create("commonName", tezConf1, false); tezClient1.start(); - DAG dag1 = createSimpleDAG("dag1", SleepProcessor.class.getName()); + DAG dag1 = createSimpleDAG("testNoSysExitOnSuccessfulDAG", SleepProcessor.class.getName()); DAGClient dagClient1 = tezClient1.submitDAG(dag1); dagClient1.waitForCompletion(); assertEquals(DAGStatus.State.SUCCEEDED, dagClient1.getDAGStatus(null).getState()); - + assertEquals(VertexStatus.State.SUCCEEDED, + dagClient1.getVertexStatus(SleepProcessor.SLEEP_VERTEX_NAME, null).getState()); // Sleep for more time than is required for the DAG to complete. Thread.sleep((long) (TezConstants.TEZ_DAG_SLEEP_TIME_BEFORE_EXIT * 1.5)); @@ -148,22 +213,20 @@ public void testNoSysExitOnSuccessfulDAG() throws TezException, InterruptedExcep } @Test(timeout = 20000) - public void testNoSysExitOnFailinglDAG() throws TezException, InterruptedException, + public void testNoSysExitOnFailingDAG() throws TezException, InterruptedException, IOException { - TezConfiguration tezConf1 = new TezConfiguration(); - tezConf1.setBoolean(TezConfiguration.TEZ_LOCAL_MODE, true); - tezConf1.set("fs.defaultFS", "file:///"); - tezConf1.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_OPTIMIZE_LOCAL_FETCH, true); + TezConfiguration tezConf1 = createConf(); // Run in non-session mode so that the AM terminates TezClient tezClient1 = TezClient.create("commonName", tezConf1, false); tezClient1.start(); - DAG dag1 = createSimpleDAG("dag1", FailingProcessor.class.getName()); + DAG dag1 = createSimpleDAG("testNoSysExitOnFailingDAG", FailingProcessor.class.getName()); DAGClient dagClient1 = tezClient1.submitDAG(dag1); dagClient1.waitForCompletion(); assertEquals(DAGStatus.State.FAILED, dagClient1.getDAGStatus(null).getState()); - + assertEquals(VertexStatus.State.FAILED, + dagClient1.getVertexStatus(SleepProcessor.SLEEP_VERTEX_NAME, null).getState()); // Sleep for more time than is required for the DAG to complete. Thread.sleep((long) (TezConstants.TEZ_DAG_SLEEP_TIME_BEFORE_EXIT * 1.5)); @@ -198,12 +261,15 @@ public void run(Map inputs, Map out } private DAG createSimpleDAG(String dagName, String processorName) { - DAG dag = DAG.create(dagName).addVertex(Vertex.create("Sleep", ProcessorDescriptor.create( - processorName).setUserPayload( - new SleepProcessor.SleepProcessorConfig(1).toUserPayload()), 1)); + DAG dag = DAG.create(generateDagName("DAG-" + dagName)).addVertex( + Vertex.create(SleepProcessor.SLEEP_VERTEX_NAME, ProcessorDescriptor.create(processorName).setUserPayload( + new SleepProcessor.SleepProcessorConfig(SLEEP_PROCESSOR_TIME_TO_SLEEP_MS).toUserPayload()), 1)); return dag; - } + private String generateDagName(String baseName) { + return baseName + (useDfs ? "_useDfs" : "") + (useLocalModeWithoutNetwork ? "_useLocalModeWithoutNetwork" : ""); + } + @Test(timeout=30000) public void testMultiDAGsOnSession() throws IOException, TezException, InterruptedException { int dags = 2;//two dags will be submitted to session @@ -211,19 +277,16 @@ public void testMultiDAGsOnSession() throws IOException, TezException, Interrupt String[] outputPaths = new String[dags]; DAGClient[] dagClients = new DAGClient[dags]; - TezConfiguration tezConf = new TezConfiguration(); - tezConf.setBoolean(TezConfiguration.TEZ_LOCAL_MODE, true); - tezConf.set("fs.defaultFS", "file:///"); - tezConf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_OPTIMIZE_LOCAL_FETCH, true); + TezConfiguration tezConf = createConf(); TezClient tezClient = TezClient.create("testMultiDAGOnSession", tezConf, true); tezClient.start(); //create inputs and outputs FileSystem fs = FileSystem.get(tezConf); for(int i = 0; i < dags; i++) { - inputPaths[i] = new Path(TEST_DIR.getAbsolutePath(),"in-"+i).toString(); + inputPaths[i] = new Path(STAGING_DIR.getAbsolutePath(), "in-" + i).toString(); createInputFile(fs, inputPaths[i]); - outputPaths[i] = new Path(TEST_DIR.getAbsolutePath(),"out-"+i).toString(); + outputPaths[i] = new Path(STAGING_DIR.getAbsolutePath(), "out-" + i).toString(); } //start testing diff --git a/tez-tests/src/test/java/org/apache/tez/test/TestMiniTezCluster.java b/tez-tests/src/test/java/org/apache/tez/test/TestMiniTezCluster.java new file mode 100644 index 0000000000..4fda977a4c --- /dev/null +++ b/tez-tests/src/test/java/org/apache/tez/test/TestMiniTezCluster.java @@ -0,0 +1,53 @@ +/** +* Licensed to the Apache Software Foundation (ASF) under one +* or more contributor license agreements. See the NOTICE file +* distributed with this work for additional information +* regarding copyright ownership. The ASF licenses this file +* to you under the Apache License, Version 2.0 (the +* "License"); you may not use this file except in compliance +* with the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ +package org.apache.tez.test; + +import java.io.IOException; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.junit.Assert; +import org.junit.Test; + +public class TestMiniTezCluster { + + @Test + public void testOverrideYarnDiskHealthCheck() throws IOException { + MiniTezCluster tezMiniCluster = new MiniTezCluster(TestMiniTezCluster.class.getName(), 1, 1, 1); + tezMiniCluster.init(new Configuration()); + tezMiniCluster.start(); + + // overrides if not set + Assert.assertEquals(99.0, tezMiniCluster.getConfig() + .getFloat(YarnConfiguration.NM_MAX_PER_DISK_UTILIZATION_PERCENTAGE, -1), 0.00001); + + tezMiniCluster.close(); + + tezMiniCluster = new MiniTezCluster(TestMiniTezCluster.class.getName(), 1, 1, 1); + Configuration conf = new Configuration(); + conf.setFloat(YarnConfiguration.NM_MAX_PER_DISK_UTILIZATION_PERCENTAGE, 50); + tezMiniCluster.init(conf); + tezMiniCluster.start(); + + // respects provided non-default value + Assert.assertEquals(50.0, tezMiniCluster.getConfig() + .getFloat(YarnConfiguration.NM_MAX_PER_DISK_UTILIZATION_PERCENTAGE, -1), 0.00001); + + tezMiniCluster.close(); + } +} diff --git a/tez-tests/src/test/java/org/apache/tez/test/TestOutput.java b/tez-tests/src/test/java/org/apache/tez/test/TestOutput.java index 8b292ab196..b595743f02 100644 --- a/tez-tests/src/test/java/org/apache/tez/test/TestOutput.java +++ b/tez-tests/src/test/java/org/apache/tez/test/TestOutput.java @@ -80,7 +80,7 @@ public void handleEvents(List outputEvents) { @Override public List close() throws Exception { LOG.info("Sending data movement event with value: " + output); - getContext().getCounters().findCounter(COUNTER_NAME, COUNTER_NAME).increment(1);; + getContext().getCounters().findCounter(COUNTER_NAME, COUNTER_NAME).increment(1); ByteBuffer result = ByteBuffer.allocate(4).putInt(output); result.flip(); List events = Lists.newArrayListWithCapacity(getNumPhysicalOutputs()); diff --git a/tez-tests/src/test/java/org/apache/tez/test/TestPipelinedShuffle.java b/tez-tests/src/test/java/org/apache/tez/test/TestPipelinedShuffle.java index 36ac488b22..e44b6eb7f5 100644 --- a/tez-tests/src/test/java/org/apache/tez/test/TestPipelinedShuffle.java +++ b/tez-tests/src/test/java/org/apache/tez/test/TestPipelinedShuffle.java @@ -18,7 +18,7 @@ package org.apache.tez.test; -import com.google.common.base.Preconditions; +import org.apache.tez.common.Preconditions; import com.google.common.collect.Sets; import org.apache.commons.lang.RandomStringUtils; import org.apache.hadoop.conf.Configuration; diff --git a/tez-tests/src/test/java/org/apache/tez/test/TestRecovery.java b/tez-tests/src/test/java/org/apache/tez/test/TestRecovery.java index 93fd972161..c7b1fb9639 100644 --- a/tez-tests/src/test/java/org/apache/tez/test/TestRecovery.java +++ b/tez-tests/src/test/java/org/apache/tez/test/TestRecovery.java @@ -32,6 +32,7 @@ import java.util.Set; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.CommonConfigurationKeysPublic; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileStatus; @@ -478,6 +479,9 @@ private void testHashJoinExample(SimpleShutdownCondition shutdownCondition, RecoveryService.TEZ_TEST_RECOVERY_DRAIN_EVENTS_WHEN_STOPPED, false); tezConf.setBoolean( TezConfiguration.TEZ_AM_STAGING_SCRATCH_DATA_AUTO_DELETE, false); + tezConf.setInt(CommonConfigurationKeysPublic.IPC_CLIENT_CONNECT_MAX_RETRIES_KEY,0); + tezConf.setInt(CommonConfigurationKeysPublic.IPC_CLIENT_CONNECT_MAX_RETRIES_ON_SOCKET_TIMEOUTS_KEY, 0); + tezConf.setInt(CommonConfigurationKeysPublic.IPC_CLIENT_CONNECT_TIMEOUT_KEY,1000); tezConf.set(TezConfiguration.TEZ_AM_LOG_LEVEL, "INFO;org.apache.tez=DEBUG"); hashJoinExample.setConf(tezConf); diff --git a/tez-tests/src/test/java/org/apache/tez/test/TestSecureShuffle.java b/tez-tests/src/test/java/org/apache/tez/test/TestSecureShuffle.java index 0fb07fc481..f9d4469779 100644 --- a/tez-tests/src/test/java/org/apache/tez/test/TestSecureShuffle.java +++ b/tez-tests/src/test/java/org/apache/tez/test/TestSecureShuffle.java @@ -18,14 +18,25 @@ package org.apache.tez.test; +import static org.apache.hadoop.security.ssl.SSLFactory.SSL_CLIENT_CONF_KEY; import static org.junit.Assert.assertEquals; import java.io.BufferedWriter; import java.io.File; import java.io.IOException; import java.io.OutputStreamWriter; +import java.math.BigInteger; +import java.net.InetAddress; +import java.security.KeyPair; +import java.security.SecureRandom; +import java.security.cert.X509Certificate; import java.util.ArrayList; import java.util.Collection; +import java.util.Date; +import java.util.HashMap; +import java.util.Map; + +import javax.security.auth.x500.X500Principal; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; @@ -36,10 +47,14 @@ import org.apache.hadoop.hdfs.server.namenode.EditLogFileOutputStream; import org.apache.hadoop.mapreduce.MRConfig; import org.apache.hadoop.security.ssl.KeyStoreTestUtil; -import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.hadoop.security.ssl.SSLFactory; import org.apache.tez.dag.api.TezConfiguration; import org.apache.tez.mapreduce.examples.TestOrderedWordCount; import org.apache.tez.runtime.library.api.TezRuntimeConfiguration; +import org.bouncycastle.asn1.x509.GeneralName; +import org.bouncycastle.asn1.x509.GeneralNames; +import org.bouncycastle.asn1.x509.X509Extensions; +import org.bouncycastle.x509.X509V3CertificateGenerator; import org.junit.After; import org.junit.AfterClass; import org.junit.Before; @@ -99,6 +114,7 @@ public static Collection getParameters() { public static void setupDFSCluster() throws Exception { conf = new Configuration(); conf.setBoolean(DFSConfigKeys.DFS_NAMENODE_EDITS_NOEDITLOGCHANNELFLUSH, false); + conf.setBoolean("fs.hdfs.impl.disable.cache", true); EditLogFileOutputStream.setShouldSkipFsyncForTesting(true); conf.set(MiniDFSCluster.HDFS_MINIDFS_BASEDIR, TEST_ROOT_DIR); miniDFSCluster = @@ -130,8 +146,13 @@ public void setupTezCluster() throws Exception { conf.setInt(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_READ_TIMEOUT, 3 * 1000); //set to low value so that it can detect failures quickly conf.setInt(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_FAILURES_LIMIT, 2); + //reduce the maximum number of failed attempts per task + conf.setInt(TezConfiguration.TEZ_AM_TASK_MAX_FAILED_ATTEMPTS, 1); conf.setLong(TezConfiguration.TEZ_AM_SLEEP_TIME_BEFORE_EXIT_MILLIS, 500); + conf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_USE_ASYNC_HTTP, asyncHttp); + String sslConf = conf.get(SSL_CLIENT_CONF_KEY, "ssl-client.xml"); + conf.addResource(sslConf); miniTezCluster = new MiniTezCluster(TestSecureShuffle.class.getName() + "-" + (enableSSLInCluster ? "withssl" : "withoutssl"), 1, 1, 1); @@ -201,10 +222,106 @@ private static void createSampleFile(Path inputLoc) throws IOException { */ private static void setupKeyStores() throws Exception { keysStoresDir.mkdirs(); - String sslConfsDir = - KeyStoreTestUtil.getClasspathDir(TestSecureShuffle.class); + String sslConfsDir = KeyStoreTestUtil.getClasspathDir(TestSecureShuffle.class); + + setupSSLConfig(keysStoresDir.getAbsolutePath(), sslConfsDir, conf, true, true, ""); + } + + /** + * This is a copied version of hadoop's KeyStoreTestUtil.setupSSLConfig which was needed to create + * server certs with actual hostname in CN instead of "localhost". While upgrading async http + * client in TEZ-4237, it turned out that netty doesn't support custom hostname verifiers anymore + * (as discussed in https://github.com/AsyncHttpClient/async-http-client/issues/928), that's why + * it cannot be set for an async http connection. So instead of hacking an ALLOW_ALL verifier + * somehow (which cannot be propagated to netty), a valid certificate with the actual hostname + * should be generated in setupSSLConfig. So, one change is the usage of + * InetAddress.getLocalHost().getHostName(), the other is using local generateCertificate, + * which fixes another issue. + */ + public static void setupSSLConfig(String keystoresDir, String sslConfDir, Configuration config, + boolean useClientCert, boolean trustStore, String excludeCiphers) throws Exception { + String clientKS = keystoresDir + "/clientKS.jks"; + String clientPassword = "clientP"; + String serverKS = keystoresDir + "/serverKS.jks"; + String serverPassword = "serverP"; + String trustKS = null; + String trustPassword = "trustP"; + + File sslClientConfFile = new File(sslConfDir, KeyStoreTestUtil.getClientSSLConfigFileName()); + File sslServerConfFile = new File(sslConfDir, KeyStoreTestUtil.getServerSSLConfigFileName()); + + Map certs = new HashMap(); + + if (useClientCert) { + KeyPair cKP = KeyStoreTestUtil.generateKeyPair("RSA"); + X509Certificate cCert = + generateCertificate("CN=localhost, O=client", cKP, 30, "SHA1withRSA"); + KeyStoreTestUtil.createKeyStore(clientKS, clientPassword, "client", cKP.getPrivate(), cCert); + certs.put("client", cCert); + } + + String localhostName = InetAddress.getLocalHost().getHostName(); + KeyPair sKP = KeyStoreTestUtil.generateKeyPair("RSA"); + X509Certificate sCert = + generateCertificate("CN="+localhostName+", O=server", sKP, 30, "SHA1withRSA"); + KeyStoreTestUtil.createKeyStore(serverKS, serverPassword, "server", sKP.getPrivate(), sCert); + certs.put("server", sCert); + + if (trustStore) { + trustKS = keystoresDir + "/trustKS.jks"; + KeyStoreTestUtil.createTrustStore(trustKS, trustPassword, certs); + } + + Configuration clientSSLConf = KeyStoreTestUtil.createClientSSLConfig(clientKS, clientPassword, + clientPassword, trustKS, excludeCiphers); + Configuration serverSSLConf = KeyStoreTestUtil.createServerSSLConfig(serverKS, serverPassword, + serverPassword, trustKS, excludeCiphers); + + KeyStoreTestUtil.saveConfig(sslClientConfFile, clientSSLConf); + KeyStoreTestUtil.saveConfig(sslServerConfFile, serverSSLConf); + + // this will be ignored for AsyncHttpConnection, see method comments above + config.set(SSLFactory.SSL_HOSTNAME_VERIFIER_KEY, "ALLOW_ALL"); + + config.set(SSLFactory.SSL_CLIENT_CONF_KEY, sslClientConfFile.getName()); + config.set(SSLFactory.SSL_SERVER_CONF_KEY, sslServerConfFile.getName()); + config.setBoolean(SSLFactory.SSL_REQUIRE_CLIENT_CERT_KEY, useClientCert); + } + + /** + * This is a copied version of hadoop's KeyStoreTestUtil.generateCertificate, which takes care of setting + * IP address as a SSL Subject Alternative Name (SAN). Without this, SSL shuffle failed with async http client. + * Introduced by TEZ-4342. + */ + public static X509Certificate generateCertificate(String dn, KeyPair pair, int days, String algorithm) + throws Exception { + + Date from = new Date(); + Date to = new Date(from.getTime() + days * 86400000L); + BigInteger sn = new BigInteger(64, new SecureRandom()); + KeyPair keyPair = pair; + X509V3CertificateGenerator certGen = new X509V3CertificateGenerator(); + + String hostName = InetAddress.getLocalHost().getHostName(); + String hostAddress = InetAddress.getLocalHost().getHostAddress(); + certGen.addExtension(X509Extensions.SubjectAlternativeName, false, + new GeneralNames(new GeneralName[] { + new GeneralName(GeneralName.iPAddress, hostAddress), + new GeneralName(GeneralName.dNSName, hostName), + new GeneralName(GeneralName.dNSName, "localhost") + }) + ); + X500Principal dnName = new X500Principal(dn); + + certGen.setSerialNumber(sn); + certGen.setIssuerDN(dnName); + certGen.setNotBefore(from); + certGen.setNotAfter(to); + certGen.setSubjectDN(dnName); + certGen.setPublicKey(keyPair.getPublic()); + certGen.setSignatureAlgorithm(algorithm); - KeyStoreTestUtil.setupSSLConfig(keysStoresDir.getAbsolutePath(), - sslConfsDir, conf, true); + X509Certificate cert = certGen.generate(pair.getPrivate()); + return cert; } } diff --git a/tez-tests/src/test/java/org/apache/tez/test/TestTaskErrorsUsingLocalMode.java b/tez-tests/src/test/java/org/apache/tez/test/TestTaskErrorsUsingLocalMode.java index d622698f29..d7d2dd925f 100644 --- a/tez-tests/src/test/java/org/apache/tez/test/TestTaskErrorsUsingLocalMode.java +++ b/tez-tests/src/test/java/org/apache/tez/test/TestTaskErrorsUsingLocalMode.java @@ -16,6 +16,7 @@ import static org.junit.Assert.assertEquals; +import java.io.File; import java.io.IOException; import java.util.List; import java.util.Map; @@ -44,6 +45,8 @@ public class TestTaskErrorsUsingLocalMode { private static final Logger LOG = LoggerFactory.getLogger(TestTaskErrorsUsingLocalMode.class); private static final String VERTEX_NAME = "vertex1"; + private static final File STAGING_DIR = new File(System.getProperty("test.build.data"), + TestTaskErrorsUsingLocalMode.class.getName()).getAbsoluteFile(); @Test(timeout = 20000) @@ -123,6 +126,7 @@ private TezClient getTezClient(String name) throws IOException, TezException { TezConfiguration tezConf1 = new TezConfiguration(); tezConf1.setBoolean(TezConfiguration.TEZ_LOCAL_MODE, true); tezConf1.set("fs.defaultFS", "file:///"); + tezConf1.set(TezConfiguration.TEZ_AM_STAGING_DIR, STAGING_DIR.getAbsolutePath()); tezConf1.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_OPTIMIZE_LOCAL_FETCH, true); tezConf1.setLong(TezConfiguration.TEZ_AM_SLEEP_TIME_BEFORE_EXIT_MILLIS, 500); TezClient tezClient1 = TezClient.create(name, tezConf1, true); diff --git a/tez-tests/src/test/java/org/apache/tez/test/TestTezJobs.java b/tez-tests/src/test/java/org/apache/tez/test/TestTezJobs.java index 2dfc76ddff..ee717f33c0 100644 --- a/tez-tests/src/test/java/org/apache/tez/test/TestTezJobs.java +++ b/tez-tests/src/test/java/org/apache/tez/test/TestTezJobs.java @@ -18,6 +18,10 @@ package org.apache.tez.test; +import static org.apache.tez.dag.api.TezConfiguration.TEZ_AM_HOOKS; +import static org.apache.tez.dag.api.TezConfiguration.TEZ_THREAD_DUMP_INTERVAL; +import static org.apache.tez.dag.api.TezConfiguration.TEZ_TASK_ATTEMPT_HOOKS; +import static org.apache.tez.dag.api.TezConstants.TEZ_CONTAINER_LOGGER_NAME; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; @@ -40,13 +44,16 @@ import java.util.concurrent.locks.Condition; import java.util.concurrent.locks.ReentrantLock; -import com.google.common.base.Preconditions; +import org.apache.hadoop.fs.LocatedFileStatus; +import org.apache.hadoop.fs.RemoteIterator; +import org.apache.tez.common.Preconditions; import com.google.common.collect.Lists; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.YarnApplicationState; +import org.apache.tez.common.TezContainerLogAppender; import org.apache.tez.common.counters.CounterGroup; import org.apache.tez.common.counters.TaskCounter; import org.apache.tez.common.counters.TezCounter; @@ -54,10 +61,13 @@ import org.apache.tez.dag.api.Edge; import org.apache.tez.dag.api.client.StatusGetOpts; import org.apache.tez.dag.api.client.VertexStatus; +import org.apache.tez.dag.app.ThreadDumpDAGHook; import org.apache.tez.mapreduce.examples.CartesianProduct; +import org.apache.tez.runtime.library.api.TezRuntimeConfiguration; import org.apache.tez.runtime.library.cartesianproduct.CartesianProductVertexManager; import org.apache.tez.runtime.library.conf.OrderedPartitionedKVEdgeConfig; import org.apache.tez.runtime.library.partitioner.HashPartitioner; +import org.apache.tez.runtime.task.ThreadDumpTaskAttemptHook; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; @@ -138,7 +148,6 @@ public static void setup() throws IOException { if (mrrTezCluster == null) { mrrTezCluster = new MiniTezCluster(TestTezJobs.class.getName(), 1, 1, 1); - Configuration conf = new Configuration(); conf.set("fs.defaultFS", remoteFs.getUri().toString()); // use HDFS conf.setLong(TezConfiguration.TEZ_AM_SLEEP_TIME_BEFORE_EXIT_MILLIS, 500); mrrTezCluster.init(conf); @@ -163,11 +172,60 @@ public static void tearDown() { @Test(timeout = 60000) public void testHashJoinExample() throws Exception { HashJoinExample hashJoinExample = new HashJoinExample(); - hashJoinExample.setConf(mrrTezCluster.getConfig()); - Path stagingDirPath = new Path("/tmp/tez-staging-dir"); - Path inPath1 = new Path("/tmp/hashJoin/inPath1"); - Path inPath2 = new Path("/tmp/hashJoin/inPath2"); - Path outPath = new Path("/tmp/hashJoin/outPath"); + hashJoinExample.setConf(new Configuration(mrrTezCluster.getConfig())); + runHashJoinExample(hashJoinExample); + } + + @Test(timeout = 60000) + public void testHashJoinExampleWithLogPattern() throws Exception { + HashJoinExample hashJoinExample = new HashJoinExample(); + + Configuration patternConfig = new Configuration(mrrTezCluster.getConfig()); + + patternConfig.set(TezConfiguration.TEZ_AM_LOG_LEVEL, "debug"); + patternConfig.set(TezConfiguration.TEZ_TASK_LOG_LEVEL, "debug"); + patternConfig.set(TezConfiguration.TEZ_LOG_PATTERN_LAYOUT_AM, + "%d{ISO8601} [%p] [%t (queryId=%X{queryId} dag=%X{dagId})] |%c{2}|: %m%n"); + patternConfig.set(TezConfiguration.TEZ_LOG_PATTERN_LAYOUT_TASK, + "%d{ISO8601} [%p] [%t (queryId=%X{queryId} dag=%X{dagId} task=%X{taskAttemptId})] |%c{2}|: %m%n"); + patternConfig.set(TezConfiguration.TEZ_MDC_CUSTOM_KEYS, "queryId"); + patternConfig.set(TezConfiguration.TEZ_MDC_CUSTOM_KEYS_CONF_PROPS, "hive.query.id"); + patternConfig.set("hive.query.id", "hello-upstream-application-12345"); + + //1. feature is on + //[main (queryId=hello-upstream-application-12345 dag=dag_1666683231618_0001_1)] |app.DAGAppMaster| + hashJoinExample.setConf(patternConfig); + runHashJoinExample(hashJoinExample); + + //2. feature is on, but custom keys are empty: expecting empty queryId with the same format + //[main (queryId= dag=dag_1666683231618_0002_1)] |app.DAGAppMaster| + patternConfig.set(TezConfiguration.TEZ_MDC_CUSTOM_KEYS, ""); + hashJoinExample.setConf(patternConfig); + runHashJoinExample(hashJoinExample); + + //3. feature is on, custom keys are defined but corresponding value is null in config: + //expecting empty queryId with the same format + //[main (queryId= dag=dag_1666683231618_0003_1)] |app.DAGAppMaster| + patternConfig.set(TezConfiguration.TEZ_MDC_CUSTOM_KEYS, "queryId"); + patternConfig.set(TezConfiguration.TEZ_MDC_CUSTOM_KEYS_CONF_PROPS, "hive.query.id.null"); + hashJoinExample.setConf(patternConfig); + runHashJoinExample(hashJoinExample); + + //4. feature is off: expecting to have properly formatted log lines with original log4j config (not empty string) + //[main] |app.DAGAppMaster| + patternConfig.set(TezConfiguration.TEZ_LOG_PATTERN_LAYOUT_AM, TezConfiguration.TEZ_LOG_PATTERN_LAYOUT_DEFAULT); + patternConfig.set(TezConfiguration.TEZ_LOG_PATTERN_LAYOUT_TASK, TezConfiguration.TEZ_LOG_PATTERN_LAYOUT_DEFAULT); + + hashJoinExample.setConf(patternConfig); + runHashJoinExample(hashJoinExample); + } + + private void runHashJoinExample(HashJoinExample hashJoinExample) throws Exception { + int random = new Random(System.currentTimeMillis()).nextInt(10000); + Path stagingDirPath = new Path(String.format("/tmp/tez-staging-dir%d", random)); + Path inPath1 = new Path(String.format("/tmp/hashJoin%d/inPath1", random)); + Path inPath2 = new Path(String.format("/tmp/hashJoin%d/inPath2", random)); + Path outPath = new Path(String.format("/tmp/hashJoin%d/outPath", random)); remoteFs.mkdirs(inPath1); remoteFs.mkdirs(inPath2); remoteFs.mkdirs(stagingDirPath); @@ -216,10 +274,64 @@ public boolean accept(Path p) { assertEquals(0, expectedResult.size()); } + /** + * test whole {@link HashJoinExample} pipeline as following:
    + * {@link JoinDataGen} -> {@link HashJoinExample} -> {@link JoinValidate} + * @throws Exception + */ + @Test(timeout = 120000) + public void testHashJoinExampleWithDataViaEvent() throws Exception { + + Path testDir = new Path("/tmp/testHashJoinExampleDataViaEvent"); + Path stagingDirPath = new Path("/tmp/tez-staging-dir"); + remoteFs.mkdirs(stagingDirPath); + remoteFs.mkdirs(testDir); + + Path dataPath1 = new Path(testDir, "inPath1"); + Path dataPath2 = new Path(testDir, "inPath2"); + Path expectedOutputPath = new Path(testDir, "expectedOutputPath"); + Path outPath = new Path(testDir, "outPath"); + + TezConfiguration tezConf = new TezConfiguration(mrrTezCluster.getConfig()); + tezConf.set(TezConfiguration.TEZ_AM_STAGING_DIR, stagingDirPath.toString()); + + //turn on the dataViaEvent + tezConf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_TRANSFER_DATA_VIA_EVENTS_ENABLED, true); + + TezClient tezSession = null; + try { + tezSession = TezClient.create("HashJoinExampleSession", tezConf, true); + tezSession.start(); + + JoinDataGen dataGen = new JoinDataGen(); + String[] dataGenArgs = new String[] { + "-counter", + dataPath1.toString(), "1048576", dataPath2.toString(), "8", + expectedOutputPath.toString(), "2" }; + assertEquals(0, dataGen.run(tezConf, dataGenArgs, tezSession)); + + HashJoinExample joinExample = new HashJoinExample(); + String[] args = new String[] { + dataPath1.toString(), dataPath2.toString(), "1", outPath.toString(), + "doBroadcast"}; + + assertEquals(0, joinExample.run(tezConf, args, tezSession)); + + JoinValidate joinValidate = new JoinValidate(); + String[] validateArgs = new String[] { + "-counter", expectedOutputPath.toString(), outPath.toString(), "3" }; + assertEquals(0, joinValidate.run(tezConf, validateArgs, tezSession)); + } finally { + if (tezSession != null) { + tezSession.stop(); + } + } + } + @Test(timeout = 60000) public void testHashJoinExampleDisableSplitGrouping() throws Exception { HashJoinExample hashJoinExample = new HashJoinExample(); - hashJoinExample.setConf(conf); + hashJoinExample.setConf(new Configuration(mrrTezCluster.getConfig())); Path stagingDirPath = new Path(TEST_ROOT_DIR + "/tmp/tez-staging-dir"); Path inPath1 = new Path(TEST_ROOT_DIR + "/tmp/hashJoin/inPath1"); Path inPath2 = new Path(TEST_ROOT_DIR + "/tmp/hashJoin/inPath2"); @@ -430,8 +542,28 @@ public void testPerIOCounterAggregation() throws Exception { @Test(timeout = 60000) public void testSortMergeJoinExampleDisableSplitGrouping() throws Exception { + testSortMergeJoinExampleDisableSplitGrouping(false); + } + + @Test + public void testSortMergeJoinExampleWithThreadDump() throws Exception { + testSortMergeJoinExampleDisableSplitGrouping(true); + } + + public void testSortMergeJoinExampleDisableSplitGrouping(boolean withThreadDump) throws Exception { SortMergeJoinExample sortMergeJoinExample = new SortMergeJoinExample(); - sortMergeJoinExample.setConf(conf); + Configuration newConf = new Configuration(mrrTezCluster.getConfig()); + Path logPath = new Path(TEST_ROOT_DIR + "/tmp/sortMerge/logPath"); + if (withThreadDump) { + TezContainerLogAppender appender = new TezContainerLogAppender(); + org.apache.log4j.Logger.getRootLogger().addAppender(appender); + appender.setName(TEZ_CONTAINER_LOGGER_NAME); + appender.setContainerLogDir(logPath.toString()); + newConf.set(TEZ_AM_HOOKS, ThreadDumpDAGHook.class.getName()); + newConf.set(TEZ_TASK_ATTEMPT_HOOKS, ThreadDumpTaskAttemptHook.class.getName()); + newConf.set(TEZ_THREAD_DUMP_INTERVAL, "1ms"); + } + sortMergeJoinExample.setConf(newConf); Path stagingDirPath = new Path(TEST_ROOT_DIR + "/tmp/tez-staging-dir"); Path inPath1 = new Path(TEST_ROOT_DIR + "/tmp/sortMerge/inPath1"); Path inPath2 = new Path(TEST_ROOT_DIR + "/tmp/sortMerge/inPath2"); @@ -484,6 +616,29 @@ public boolean accept(Path p) { reader.close(); inStream.close(); assertEquals(0, expectedResult.size()); + + if (withThreadDump) { + validateThreadDumpCaptured(logPath); + org.apache.log4j.Logger.getRootLogger().removeAppender(TEZ_CONTAINER_LOGGER_NAME); + } + } + + private static void validateThreadDumpCaptured(Path jstackPath) throws IOException { + RemoteIterator files = localFs.listFiles(jstackPath, true); + boolean appMasterDumpFound = false; + boolean tezChildDumpFound = false; + while (files.hasNext()) { + LocatedFileStatus file = files.next(); + if (file.getPath().getName().endsWith(".jstack")) { + if (file.getPath().getName().contains("attempt")) { + tezChildDumpFound = true; + } else { + appMasterDumpFound = true; + } + } + } + assertTrue(tezChildDumpFound); + assertTrue(appMasterDumpFound); } /** diff --git a/tez-tests/src/test/java/org/apache/tez/test/dag/FailingDagBuilder.java b/tez-tests/src/test/java/org/apache/tez/test/dag/FailingDagBuilder.java new file mode 100644 index 0000000000..69f7ba8112 --- /dev/null +++ b/tez-tests/src/test/java/org/apache/tez/test/dag/FailingDagBuilder.java @@ -0,0 +1,166 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tez.test.dag; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.tez.common.TezUtils; +import org.apache.tez.dag.api.*; +import org.apache.tez.test.TestInput; +import org.apache.tez.test.TestOutput; +import org.apache.tez.test.TestProcessor; + +import java.io.IOException; +import java.util.function.BiConsumer; + +/** + * A builder for a DAG with vertices divided into a maximum of 6 levels. + * Vertex name is "lv". Level/vertex numbers start at 1. + * Each vertex has failing processor and failing inputs. + * The builder can accept Tez Configuration to indicate failing patterns. + * The number of levels in the built DAG can be configured. + *

    + * DAG is shown with a diagram below. + * Each vertex has its degree of parallelism indicated in a bracket following its name. + * Each edge annotates with data movement (s = scatter/gather, b = broadcast) + *

    + * l1v1(1) l1v2(2) l1v3(3) l1v4(2) + * |s |s |s |b + * | | | | + * l2v1(1) l2v2(3) l2v3(2) l2v4(3) + * \s /s \b |s /s + * \ / \ | / + * l3v1(4) l3v2(4) + * \s /s + * \ / + * l4v1 (10) + * /s |s \s + * / | \ + * l5v1(2) l5v2(4) l5v3(1) + * \s |s /s + * \ | / + * l6v1(4) + * + */ + +public class FailingDagBuilder { + + private final static Resource DEFAULT_RESOURCE = org.apache.hadoop.yarn.api.records.Resource.newInstance(100, 0); + + private final Levels levels; + private String name; + private Configuration conf; + + public enum Levels { + TWO("TwoLevelsFailingDAG", (dag, payload) -> { + Vertex l1v1 = Vertex.create("l1v1", TestProcessor.getProcDesc(payload), 1, DEFAULT_RESOURCE); + Vertex l2v1 = Vertex.create("l2v1", TestProcessor.getProcDesc(payload), 1, DEFAULT_RESOURCE); + addVerticesAndEdgeInternal(dag, l1v1, l2v1, EdgeProperty.DataMovementType.SCATTER_GATHER, payload); + Vertex l1v2 = Vertex.create("l1v2", TestProcessor.getProcDesc(payload), 2, DEFAULT_RESOURCE); + Vertex l2v2 = Vertex.create("l2v2", TestProcessor.getProcDesc(payload), 3, DEFAULT_RESOURCE); + addVerticesAndEdgeInternal(dag, l1v2, l2v2, EdgeProperty.DataMovementType.SCATTER_GATHER, payload); + Vertex l1v3 = Vertex.create("l1v3", TestProcessor.getProcDesc(payload), 3, DEFAULT_RESOURCE); + Vertex l2v3 = Vertex.create("l2v3", TestProcessor.getProcDesc(payload), 2, DEFAULT_RESOURCE); + addVerticesAndEdgeInternal(dag, l1v3, l2v3, EdgeProperty.DataMovementType.SCATTER_GATHER, payload); + Vertex l1v4 = Vertex.create("l1v4", TestProcessor.getProcDesc(payload), 2, DEFAULT_RESOURCE); + Vertex l2v4 = Vertex.create("l2v4", TestProcessor.getProcDesc(payload), 3, DEFAULT_RESOURCE); + addVerticesAndEdgeInternal(dag, l1v4, l2v4, EdgeProperty.DataMovementType.BROADCAST, payload); + }), + THREE("ThreeLevelsFailingDAG", (dag, payload) -> { + TWO.levelAdder.accept(dag, payload); + Vertex l3v1 = Vertex.create("l3v1", TestProcessor.getProcDesc(payload), 4, DEFAULT_RESOURCE); + dag.addVertex(l3v1); + addEdge(dag, dag.getVertex("l2v1"), l3v1, EdgeProperty.DataMovementType.SCATTER_GATHER, payload); + addEdge(dag, dag.getVertex("l2v2"), l3v1, EdgeProperty.DataMovementType.SCATTER_GATHER, payload); + Vertex l3v2 = Vertex.create("l3v2", TestProcessor.getProcDesc(payload), 4, DEFAULT_RESOURCE); + dag.addVertex(l3v2); + addEdge(dag, dag.getVertex("l2v2"), l3v2, EdgeProperty.DataMovementType.BROADCAST, payload); + addEdge(dag, dag.getVertex("l2v3"), l3v2, EdgeProperty.DataMovementType.SCATTER_GATHER, payload); + addEdge(dag, dag.getVertex("l2v4"), l3v2, EdgeProperty.DataMovementType.SCATTER_GATHER, payload); + }), + SIX("SixLevelsFailingDAG", (dag, payload) -> { + THREE.levelAdder.accept(dag, payload); + Vertex l4v1 = Vertex.create("l4v1", TestProcessor.getProcDesc(payload), 10, DEFAULT_RESOURCE); + dag.addVertex(l4v1); + addEdge(dag, dag.getVertex("l3v1"), l4v1, EdgeProperty.DataMovementType.SCATTER_GATHER, payload); + addEdge(dag, dag.getVertex("l3v2"), l4v1, EdgeProperty.DataMovementType.SCATTER_GATHER, payload); + Vertex l5v1 = Vertex.create("l5v1", TestProcessor.getProcDesc(payload), 2, DEFAULT_RESOURCE); + dag.addVertex(l5v1); + addEdge(dag, l4v1, l5v1, EdgeProperty.DataMovementType.SCATTER_GATHER, payload); + Vertex l5v2 = Vertex.create("l5v2", TestProcessor.getProcDesc(payload), 4, DEFAULT_RESOURCE); + dag.addVertex(l5v2); + addEdge(dag, l4v1, l5v2, EdgeProperty.DataMovementType.SCATTER_GATHER, payload); + Vertex l5v3 = Vertex.create("l5v3", TestProcessor.getProcDesc(payload), 1, DEFAULT_RESOURCE); + dag.addVertex(l5v3); + addEdge(dag, l4v1, l5v3, EdgeProperty.DataMovementType.SCATTER_GATHER, payload); + Vertex l6v1 = Vertex.create("l6v1", TestProcessor.getProcDesc(payload), 4, DEFAULT_RESOURCE); + dag.addVertex(l6v1); + addEdge(dag, l5v1, l6v1, EdgeProperty.DataMovementType.SCATTER_GATHER, payload); + addEdge(dag, l5v2, l6v1, EdgeProperty.DataMovementType.SCATTER_GATHER, payload); + addEdge(dag, l5v3, l6v1, EdgeProperty.DataMovementType.SCATTER_GATHER, payload); + }); + + private final String defaultName; + private final BiConsumer levelAdder; + Levels(String defaultName, BiConsumer levelAdder) { + this.defaultName = defaultName; + this.levelAdder = levelAdder; + } + + private static void addVerticesAndEdgeInternal( + DAG dag, Vertex v1, Vertex v2, EdgeProperty.DataMovementType dataMovementType, UserPayload payload) { + dag.addVertex(v1).addVertex(v2); + addEdge(dag, v1, v2, dataMovementType, payload); + } + + private static void addEdge( + DAG dag, Vertex v1, Vertex v2, EdgeProperty.DataMovementType dataMovementType, UserPayload payload) { + dag.addEdge(Edge.create(v1, v2, + EdgeProperty.create(dataMovementType, + EdgeProperty.DataSourceType.PERSISTED, + EdgeProperty.SchedulingType.SEQUENTIAL, + TestOutput.getOutputDesc(payload), + TestInput.getInputDesc(payload)))); + } + } + + public FailingDagBuilder(Levels levels) { + this.levels = levels; + this.name = levels.defaultName; + } + + public FailingDagBuilder withConf(Configuration config) { + conf = config; + return this; + } + + public FailingDagBuilder withName(String dagName) { + name = dagName; + return this; + } + + public DAG build() throws IOException { + UserPayload payload = UserPayload.create(null); + if (conf != null) { + payload = TezUtils.createUserPayloadFromConf(conf); + } + + DAG dag = DAG.create(name); + + levels.levelAdder.accept(dag, payload); + + return dag; + } +} diff --git a/tez-tests/src/test/java/org/apache/tez/test/dag/MultiAttemptDAG.java b/tez-tests/src/test/java/org/apache/tez/test/dag/MultiAttemptDAG.java index cdf69e69b2..f2d02727be 100644 --- a/tez-tests/src/test/java/org/apache/tez/test/dag/MultiAttemptDAG.java +++ b/tez-tests/src/test/java/org/apache/tez/test/dag/MultiAttemptDAG.java @@ -66,7 +66,7 @@ import java.util.List; import java.util.concurrent.atomic.AtomicInteger; -public class MultiAttemptDAG { +public final class MultiAttemptDAG { private static final Logger LOG = LoggerFactory.getLogger(MultiAttemptDAG.class); @@ -80,9 +80,11 @@ public class MultiAttemptDAG { "tez.multi-attempt-dag.use-failing-committer"; public static boolean MULTI_ATTEMPT_DAG_USE_FAILING_COMMITTER_DEFAULT = false; + private MultiAttemptDAG() {} + public static class FailOnAttemptVertexManagerPlugin extends VertexManagerPlugin { private int numSourceTasks = 0; - private AtomicInteger numCompletions = new AtomicInteger(); + private final AtomicInteger numCompletions = new AtomicInteger(); private boolean tasksScheduled = false; public FailOnAttemptVertexManagerPlugin(VertexManagerPluginContext context) { @@ -114,7 +116,7 @@ private synchronized void maybeScheduleTasks() { && !tasksScheduled) { tasksScheduled = true; String payload = new String(getContext().getUserPayload().deepCopyAsArray()); - int successAttemptId = Integer.valueOf(payload); + int successAttemptId = Integer.parseInt(payload); LOG.info("Checking whether to crash AM or schedule tasks" + ", vertex: " + getContext().getVertexName() + ", successfulAttemptID=" + successAttemptId @@ -150,7 +152,7 @@ public void onVertexManagerEventReceived(VertexManagerEvent vmEvent) { @Override public void onRootVertexInitialized(String inputName, InputDescriptor inputDescriptor, List events) { - List inputInfoEvents = new ArrayList(); + List inputInfoEvents = new ArrayList<>(); for (Event event: events) { if (event instanceof InputDataInformationEvent) { inputInfoEvents.add((InputDataInformationEvent)event); @@ -178,12 +180,12 @@ public void initialize() throws Exception { } @Override - public void setupOutput() throws Exception { + public void setupOutput() { } @Override - public void commitOutput() throws Exception { + public void commitOutput() { if (failOnCommit) { LOG.info("Committer causing AM to shutdown"); Runtime.getRuntime().halt(-1); @@ -191,7 +193,7 @@ public void commitOutput() throws Exception { } @Override - public void abortOutput(State finalState) throws Exception { + public void abortOutput(State finalState) { } @@ -212,11 +214,7 @@ public byte[] toUserPayload() { public void fromUserPayload(byte[] userPayload) { int failInt = Ints.fromByteArray(userPayload); - if (failInt == 0) { - failOnCommit = false; - } else { - failOnCommit = true; - } + failOnCommit = failInt != 0; } } } @@ -229,14 +227,13 @@ public TestRootInputInitializer(InputInitializerContext initializerContext) { @Override public List initialize() throws Exception { - List events = new ArrayList(); + List events = new ArrayList<>(); events.add(InputDataInformationEvent.createWithSerializedPayload(0, ByteBuffer.allocate(0))); return events; } @Override - public void handleInputInitializerEvent(List events) - throws Exception { + public void handleInputInitializerEvent(List events) { throw new UnsupportedOperationException("Not supported"); } } @@ -250,7 +247,7 @@ public FailingInputInitializer(InputInitializerContext initializerContext) { @Override public List initialize() throws Exception { try { - Thread.sleep(2000l); + Thread.sleep(2000L); } catch (InterruptedException e) { // Ignore } @@ -262,8 +259,7 @@ public List initialize() throws Exception { } @Override - public void handleInputInitializerEvent(List events) throws - Exception { + public void handleInputInitializerEvent(List events) { throw new UnsupportedOperationException("Not supported"); } } @@ -276,7 +272,7 @@ public NoOpInput(InputContext inputContext, int numPhysicalInputs) { @Override public List initialize() throws Exception { - getContext().requestInitialMemory(1l, new MemoryUpdateCallback() { + getContext().requestInitialMemory(1L, new MemoryUpdateCallback() { @Override public void memoryAssigned(long assignedSize) {} }); @@ -289,12 +285,12 @@ public void start() throws Exception { } @Override - public Reader getReader() throws Exception { + public Reader getReader() { return null; } @Override - public void handleEvents(List inputEvents) throws Exception { + public void handleEvents(List inputEvents) { } @@ -313,7 +309,7 @@ public NoOpOutput(OutputContext outputContext, @Override public List initialize() throws Exception { - getContext().requestInitialMemory(1l, new MemoryUpdateCallback() { + getContext().requestInitialMemory(1L, new MemoryUpdateCallback() { @Override public void memoryAssigned(long assignedSize) { } @@ -327,7 +323,7 @@ public void start() throws Exception { } @Override - public Writer getWriter() throws Exception { + public Writer getWriter() { return null; } @@ -361,13 +357,13 @@ public static DAG createDAG(String name, // Make each vertex manager fail on appropriate attempt v1.setVertexManagerPlugin(VertexManagerPluginDescriptor.create( FailOnAttemptVertexManagerPlugin.class.getName()) - .setUserPayload(UserPayload.create(ByteBuffer.wrap(new String("1").getBytes())))); + .setUserPayload(UserPayload.create(ByteBuffer.wrap("1".getBytes())))); v2.setVertexManagerPlugin(VertexManagerPluginDescriptor.create( FailOnAttemptVertexManagerPlugin.class.getName()) - .setUserPayload(UserPayload.create(ByteBuffer.wrap(new String("2").getBytes())))); + .setUserPayload(UserPayload.create(ByteBuffer.wrap("2".getBytes())))); v3.setVertexManagerPlugin(VertexManagerPluginDescriptor.create( FailOnAttemptVertexManagerPlugin.class.getName()) - .setUserPayload(UserPayload.create(ByteBuffer.wrap(new String("3").getBytes())))); + .setUserPayload(UserPayload.create(ByteBuffer.wrap("3".getBytes())))); dag.addVertex(v1).addVertex(v2).addVertex(v3); dag.addEdge(Edge.create(v1, v2, EdgeProperty.create(DataMovementType.SCATTER_GATHER, diff --git a/tez-tests/src/test/java/org/apache/tez/test/dag/SimpleReverseVTestDAG.java b/tez-tests/src/test/java/org/apache/tez/test/dag/SimpleReverseVTestDAG.java index 4c8771cdbe..29d4d0b978 100644 --- a/tez-tests/src/test/java/org/apache/tez/test/dag/SimpleReverseVTestDAG.java +++ b/tez-tests/src/test/java/org/apache/tez/test/dag/SimpleReverseVTestDAG.java @@ -40,11 +40,13 @@ * v2 v3 * */ -public class SimpleReverseVTestDAG { +public final class SimpleReverseVTestDAG { static Resource defaultResource = Resource.newInstance(100, 0); public static String TEZ_SIMPLE_REVERSE_V_DAG_NUM_TASKS = "tez.simple-reverse-v-test-dag.num-tasks"; public static int TEZ_SIMPLE_REVERSE_V_DAG_NUM_TASKS_DEFAULT = 2; + + private SimpleReverseVTestDAG() {} public static DAG createDAG(String name, Configuration conf) throws Exception { diff --git a/tez-tests/src/test/java/org/apache/tez/test/dag/SimpleVTestDAG.java b/tez-tests/src/test/java/org/apache/tez/test/dag/SimpleVTestDAG.java index a4eb95e059..c6e815fa52 100644 --- a/tez-tests/src/test/java/org/apache/tez/test/dag/SimpleVTestDAG.java +++ b/tez-tests/src/test/java/org/apache/tez/test/dag/SimpleVTestDAG.java @@ -40,11 +40,13 @@ * v3 * */ -public class SimpleVTestDAG { +public final class SimpleVTestDAG { static Resource defaultResource = Resource.newInstance(100, 0); public static String TEZ_SIMPLE_V_DAG_NUM_TASKS = "tez.simple-v-test-dag.num-tasks"; public static int TEZ_SIMPLE_V_DAG_NUM_TASKS_DEFAULT = 2; + + private SimpleVTestDAG() {} public static DAG createDAG(String name, Configuration conf) throws Exception { diff --git a/tez-tests/src/test/java/org/apache/tez/test/dag/SixLevelsFailingDAG.java b/tez-tests/src/test/java/org/apache/tez/test/dag/SixLevelsFailingDAG.java deleted file mode 100644 index 036bedfdb4..0000000000 --- a/tez-tests/src/test/java/org/apache/tez/test/dag/SixLevelsFailingDAG.java +++ /dev/null @@ -1,95 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.tez.test.dag; -import org.apache.hadoop.conf.Configuration; -import org.apache.tez.common.TezUtils; -import org.apache.tez.dag.api.DAG; -import org.apache.tez.dag.api.Vertex; -import org.apache.tez.dag.api.EdgeProperty.DataMovementType; -import org.apache.tez.test.TestProcessor; - -/** - * A DAG with vertices divided into 6 levels. - * Vertex name is "lv". Level/vertex numbers start at 1. - * Each vertex has failing processor and failing inputs. The constructor can accept Tez Configuration to indicate failing patterns. - * - * DAG is shown with a diagram below. - * Each vertex has its degree of parallelism indicated in a bracket following its name. - * Each edge annotates with data movement (s = scatter/gather, b = broadcast) - * - * l1v1(1) l1v2(2) l1v3(3) l1v4(2) - * |s |s |s |b - * | | | | - * l2v1(1) l2v2(3) l2v3(2) l2v4(3) - * \s /s \b |s /s - * \ / \ | / - * l3v1(4) l3v2(4) - * \s /s - * \ / - * l4v1 (10) - * /s |s \s - * / | \ - * l5v1(2) l5v2(4) l5v3(1) - * \s |s /s - * \ | / - * l6v1(4) - * - */ -public class SixLevelsFailingDAG extends ThreeLevelsFailingDAG { - - protected static Vertex l4v1; - protected static Vertex l5v1, l5v2, l5v3; - protected static Vertex l6v1; - - protected static void addDAGVerticesAndEdges() { - ThreeLevelsFailingDAG.addDAGVerticesAndEdges(); - l4v1 = Vertex.create("l4v1", TestProcessor.getProcDesc(payload), 10, defaultResource); - dag.addVertex(l4v1); - addEdge(l3v1, l4v1, DataMovementType.SCATTER_GATHER); - addEdge(l3v2, l4v1, DataMovementType.SCATTER_GATHER); - l5v1 = Vertex.create("l5v1", TestProcessor.getProcDesc(payload), 2, defaultResource); - dag.addVertex(l5v1); - addEdge(l4v1, l5v1, DataMovementType.SCATTER_GATHER); - l5v2 = Vertex.create("l5v2", TestProcessor.getProcDesc(payload), 4, defaultResource); - dag.addVertex(l5v2); - addEdge(l4v1, l5v2, DataMovementType.SCATTER_GATHER); - l5v3 = Vertex.create("l5v3", TestProcessor.getProcDesc(payload), 1, defaultResource); - dag.addVertex(l5v3); - addEdge(l4v1, l5v3, DataMovementType.SCATTER_GATHER); - l6v1 = Vertex.create("l6v1", TestProcessor.getProcDesc(payload), 4, defaultResource); - dag.addVertex(l6v1); - addEdge(l5v1, l6v1, DataMovementType.SCATTER_GATHER); - addEdge(l5v2, l6v1, DataMovementType.SCATTER_GATHER); - addEdge(l5v3, l6v1, DataMovementType.SCATTER_GATHER); - } - - public static DAG createDAG(String name, - Configuration conf) throws Exception { - if (conf != null) { - payload = TezUtils.createUserPayloadFromConf(conf); - } - dag = DAG.create(name); - addDAGVerticesAndEdges(); - return dag; - } - - public static DAG createDAG(Configuration conf) throws Exception { - return createDAG("SixLevelsFailingDAG", conf); - } -} diff --git a/tez-tests/src/test/java/org/apache/tez/test/dag/ThreeLevelsFailingDAG.java b/tez-tests/src/test/java/org/apache/tez/test/dag/ThreeLevelsFailingDAG.java deleted file mode 100644 index 7f2e4f8ecd..0000000000 --- a/tez-tests/src/test/java/org/apache/tez/test/dag/ThreeLevelsFailingDAG.java +++ /dev/null @@ -1,75 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.tez.test.dag; -import org.apache.hadoop.conf.Configuration; -import org.apache.tez.common.TezUtils; -import org.apache.tez.dag.api.DAG; -import org.apache.tez.dag.api.Vertex; -import org.apache.tez.dag.api.EdgeProperty.DataMovementType; -import org.apache.tez.test.TestProcessor; - -/** - * A DAG with vertices divided into 3 levels. - * Vertex name is "lv". Level/vertex numbers start at 1. - * Each vertex has failing processor and failing inputs. The constructor can accept Tez Configuration to indicate failing patterns. - * - * DAG is shown with a diagram below. - * Each vertex has its degree of parallelism indicated in a bracket following its name. - * Each edge annotates with data movement (s = scatter/gather, b = broadcast) - * - * l1v1(1) l1v2(2) l1v3(3) l1v4(2) - * |s |s |s |b - * | | | | - * l2v1(1) l2v2(3) l2v3(2) l2v4(3) - * \s /s \b |s /s - * \ / \ | / - * l3v1(4) l3v2(4) - * - */ -public class ThreeLevelsFailingDAG extends TwoLevelsFailingDAG { - - protected static Vertex l3v1, l3v2; - - protected static void addDAGVerticesAndEdges() { - TwoLevelsFailingDAG.addDAGVerticesAndEdges(); - l3v1 = Vertex.create("l3v1", TestProcessor.getProcDesc(payload), 4, defaultResource); - dag.addVertex(l3v1); - addEdge(l2v1, l3v1, DataMovementType.SCATTER_GATHER); - addEdge(l2v2, l3v1, DataMovementType.SCATTER_GATHER); - l3v2 = Vertex.create("l3v2", TestProcessor.getProcDesc(payload), 4, defaultResource); - dag.addVertex(l3v2); - addEdge(l2v2, l3v2, DataMovementType.BROADCAST); - addEdge(l2v3, l3v2, DataMovementType.SCATTER_GATHER); - addEdge(l2v4, l3v2, DataMovementType.SCATTER_GATHER); - } - - public static DAG createDAG(String name, - Configuration conf) throws Exception { - if (conf != null) { - payload = TezUtils.createUserPayloadFromConf(conf); - } - dag = DAG.create(name); - addDAGVerticesAndEdges(); - return dag; - } - - public static DAG createDAG(Configuration conf) throws Exception { - return createDAG("ThreeLevelsFailingDAG", conf); - } -} diff --git a/tez-tests/src/test/java/org/apache/tez/test/dag/TwoLevelsFailingDAG.java b/tez-tests/src/test/java/org/apache/tez/test/dag/TwoLevelsFailingDAG.java deleted file mode 100644 index 151e3855e1..0000000000 --- a/tez-tests/src/test/java/org/apache/tez/test/dag/TwoLevelsFailingDAG.java +++ /dev/null @@ -1,113 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.tez.test.dag; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.yarn.api.records.Resource; -import org.apache.tez.common.TezUtils; -import org.apache.tez.dag.api.DAG; -import org.apache.tez.dag.api.Edge; -import org.apache.tez.dag.api.EdgeProperty; -import org.apache.tez.dag.api.UserPayload; -import org.apache.tez.dag.api.Vertex; -import org.apache.tez.dag.api.EdgeProperty.DataMovementType; -import org.apache.tez.dag.api.EdgeProperty.DataSourceType; -import org.apache.tez.dag.api.EdgeProperty.SchedulingType; -import org.apache.tez.test.TestInput; -import org.apache.tez.test.TestOutput; -import org.apache.tez.test.TestProcessor; - -/** - * A DAG with vertices divided into 2 levels. - * Vertex name is "lv". Level/vertex numbers start at 1. - * Each vertex has failing processor and failing inputs. The constructor can accept Tez Configuration to indicate failing patterns. - * - * DAG is shown with a diagram below. - * Each vertex has its degree of parallelism indicated in a bracket following its name. - * Each edge annotates with data movement (s = scatter/gather, b = broadcast) - * - * l1v1(1) l1v2(2) l1v3(3) l1v4(2) - * |s |s |s |b - * | | | | - * l2v1(1) l2v2(3) l2v3(2) l2v4(3) - * - */ -public class TwoLevelsFailingDAG { - static Resource defaultResource = Resource.newInstance(100, 0); - protected static DAG dag; - protected static UserPayload payload = UserPayload.create(null); - protected static Vertex l1v1, l1v2, l1v3, l1v4; - protected static Vertex l2v1, l2v2, l2v3, l2v4; - - public static DAG createDAG(String name, - Configuration conf) throws Exception { - if (conf != null) { - payload = TezUtils.createUserPayloadFromConf(conf); - } - dag = DAG.create(name); - addDAGVerticesAndEdges(); - return dag; - } - - protected static void addDAGVerticesAndEdges() { - l1v1 = Vertex.create("l1v1", TestProcessor.getProcDesc(payload), 1, defaultResource); - l2v1 = Vertex.create("l2v1", TestProcessor.getProcDesc(payload), 1, defaultResource); - addVerticesAndEdgeInternal(l1v1, l2v1, DataMovementType.SCATTER_GATHER); - l1v2 = Vertex.create("l1v2", TestProcessor.getProcDesc(payload), 2, defaultResource); - l2v2 = Vertex.create("l2v2", TestProcessor.getProcDesc(payload), 3, defaultResource); - addVerticesAndEdgeInternal(l1v2, l2v2, DataMovementType.SCATTER_GATHER); - l1v3 = Vertex.create("l1v3", TestProcessor.getProcDesc(payload), 3, defaultResource); - l2v3 = Vertex.create("l2v3", TestProcessor.getProcDesc(payload), 2, defaultResource); - addVerticesAndEdgeInternal(l1v3, l2v3, DataMovementType.SCATTER_GATHER); - l1v4 = Vertex.create("l1v4", TestProcessor.getProcDesc(payload), 2, defaultResource); - l2v4 = Vertex.create("l2v4", TestProcessor.getProcDesc(payload), 3, defaultResource); - addVerticesAndEdgeInternal(l1v4, l2v4, DataMovementType.BROADCAST); - } - - /** - * Adds 2 vertices and an edge connecting them. - * Given two vertices must not exist. - * - * @param v1 vertice 1 - * @param v2 vertice 2 - * @param dataMovementType Data movement type - */ - protected static void addVerticesAndEdgeInternal(Vertex v1, Vertex v2, DataMovementType dataMovementType) { - dag.addVertex(v1).addVertex(v2); - addEdge(v1, v2, dataMovementType); - } - - /** - * Adds an edge to given 2 vertices. - * @param v1 vertice 1 - * @param v2 vertice 2 - * @param dataMovementType Data movement type - */ - protected static void addEdge(Vertex v1, Vertex v2, DataMovementType dataMovementType) { - dag.addEdge(Edge.create(v1, v2, - EdgeProperty.create(dataMovementType, - DataSourceType.PERSISTED, - SchedulingType.SEQUENTIAL, - TestOutput.getOutputDesc(payload), - TestInput.getInputDesc(payload)))); - } - - public static DAG createDAG(Configuration conf) throws Exception { - return createDAG("TwoLevelsFailingDAG", conf); - } -} diff --git a/tez-tests/src/test/resources/META-INF/LICENSE.txt b/tez-tests/src/test/resources/META-INF/LICENSE similarity index 100% rename from tez-tests/src/test/resources/META-INF/LICENSE.txt rename to tez-tests/src/test/resources/META-INF/LICENSE diff --git a/tez-tests/src/test/resources/META-INF/NOTICE b/tez-tests/src/test/resources/META-INF/NOTICE new file mode 100644 index 0000000000..2595905699 --- /dev/null +++ b/tez-tests/src/test/resources/META-INF/NOTICE @@ -0,0 +1,6 @@ +Apache Tez +Copyright 2014-2024 The Apache Software Foundation + +This product includes software developed at +The Apache Software Foundation (http://www.apache.org/). + diff --git a/tez-tests/src/test/resources/META-INF/NOTICE.txt b/tez-tests/src/test/resources/META-INF/NOTICE.txt deleted file mode 100644 index 3f36fcc6ba..0000000000 --- a/tez-tests/src/test/resources/META-INF/NOTICE.txt +++ /dev/null @@ -1,6 +0,0 @@ -Apache Tez -Copyright (c) 2016 The Apache Software Foundation - -This product includes software developed at -The Apache Software Foundation (http://www.apache.org/). - diff --git a/tez-tools/analyzers/job-analyzer/pom.xml b/tez-tools/analyzers/job-analyzer/pom.xml index bca2a19065..f3e215f951 100644 --- a/tez-tools/analyzers/job-analyzer/pom.xml +++ b/tez-tools/analyzers/job-analyzer/pom.xml @@ -20,7 +20,7 @@ org.apache.tez tez-perf-analyzer - 0.9.1-SNAPSHOT + 0.10.5-SNAPSHOT tez-job-analyzer @@ -46,6 +46,14 @@ org.apache.tez tez-dag + + org.apache.tez + hadoop-shim + + + org.apache.tez + tez-runtime-library + org.apache.tez tez-tests @@ -136,7 +144,7 @@ org.mockito - mockito-all + mockito-core test @@ -148,6 +156,10 @@ com.sun.jersey jersey-json + + org.apache.hadoop + hadoop-mapreduce-client-shuffle + diff --git a/tez-tools/analyzers/job-analyzer/src/main/java/org/apache/tez/analyzer/Analyzer.java b/tez-tools/analyzers/job-analyzer/src/main/java/org/apache/tez/analyzer/Analyzer.java index 6021c5897e..1f0a7ad62c 100644 --- a/tez-tools/analyzers/job-analyzer/src/main/java/org/apache/tez/analyzer/Analyzer.java +++ b/tez-tools/analyzers/job-analyzer/src/main/java/org/apache/tez/analyzer/Analyzer.java @@ -18,7 +18,6 @@ package org.apache.tez.analyzer; -import org.apache.hadoop.conf.Configuration; import org.apache.tez.dag.api.TezException; import org.apache.tez.history.parser.datamodel.DagInfo; @@ -54,11 +53,4 @@ public interface Analyzer { * @return description of analyzer */ public String getDescription(); - - /** - * Get config properties related to this analyzer - * - * @return config related to analyzer - */ - public Configuration getConfiguration(); } diff --git a/tez-tools/analyzers/job-analyzer/src/main/java/org/apache/tez/analyzer/CSVResult.java b/tez-tools/analyzers/job-analyzer/src/main/java/org/apache/tez/analyzer/CSVResult.java index 5246c68ee3..1144f7643c 100644 --- a/tez-tools/analyzers/job-analyzer/src/main/java/org/apache/tez/analyzer/CSVResult.java +++ b/tez-tools/analyzers/job-analyzer/src/main/java/org/apache/tez/analyzer/CSVResult.java @@ -19,7 +19,7 @@ package org.apache.tez.analyzer; import com.google.common.base.Joiner; -import com.google.common.base.Preconditions; +import org.apache.tez.common.Preconditions; import com.google.common.base.Strings; import com.google.common.collect.Iterators; import com.google.common.collect.Lists; @@ -32,8 +32,11 @@ import java.io.OutputStreamWriter; import java.nio.charset.Charset; import java.util.Arrays; +import java.util.Collections; +import java.util.Comparator; import java.util.Iterator; import java.util.List; +import java.util.Objects; /** * Simple placeholder for storing CSV results. @@ -55,7 +58,7 @@ public String[] getHeaders() { } public void addRecord(String[] record) { - Preconditions.checkArgument(record != null, "Record can't be null"); + Objects.requireNonNull(record, "Record cannot be null"); Preconditions.checkArgument(record.length == headers.length, "Record length" + record.length + " does not match headers length " + headers.length); recordsList.add(record); @@ -65,6 +68,10 @@ public Iterator getRecordsIterator() { return Iterators.unmodifiableIterator(recordsList.iterator()); } + @SuppressWarnings({ "rawtypes", "unchecked" }) + public void sort(Comparator comparator) { + Collections.sort(recordsList, comparator); + } public void setComments(String comments) { this.comments = comments; diff --git a/tez-tools/analyzers/job-analyzer/src/main/java/org/apache/tez/analyzer/plugins/AnalyzerDriver.java b/tez-tools/analyzers/job-analyzer/src/main/java/org/apache/tez/analyzer/plugins/AnalyzerDriver.java index cf600c5618..9eda46294e 100644 --- a/tez-tools/analyzers/job-analyzer/src/main/java/org/apache/tez/analyzer/plugins/AnalyzerDriver.java +++ b/tez-tools/analyzers/job-analyzer/src/main/java/org/apache/tez/analyzer/plugins/AnalyzerDriver.java @@ -20,7 +20,9 @@ import org.apache.hadoop.util.ProgramDriver; -public class AnalyzerDriver { +public final class AnalyzerDriver { + + private AnalyzerDriver() {} public static void main(String argv[]){ int exitCode = -1; @@ -46,12 +48,20 @@ public static void main(String argv[]){ "Print spill details in a DAG"); pgd.addClass("TaskAssignmentAnalyzer", TaskAssignmentAnalyzer.class, "Print task-to-node assignment details of a DAG"); + pgd.addClass("TaskAttemptResultStatisticsAnalyzer", TaskAttemptResultStatisticsAnalyzer.class, + "Print vertex:node:status level details of task attempt results"); + pgd.addClass("InputReadErrorAnalyzer", InputReadErrorAnalyzer.class, + "Print INPUT_READ_ERROR sources"); pgd.addClass("TaskConcurrencyAnalyzer", TaskConcurrencyAnalyzer.class, "Print the task concurrency details in a DAG"); pgd.addClass("VertexLevelCriticalPathAnalyzer", VertexLevelCriticalPathAnalyzer.class, "Find critical path at vertex level in a DAG"); pgd.addClass("OneOnOneEdgeAnalyzer", OneOnOneEdgeAnalyzer.class, "Find out schedule misses in 1:1 edges in a DAG"); + pgd.addClass("DagOverviewAnalyzer", DagOverviewAnalyzer.class, + "Print basic dag information (dag/vertex events)"); + pgd.addClass("TaskHangAnalyzer", HungTaskAnalyzer.class, + "Print all vertices/tasks and their last attempts with status/duration/node"); exitCode = pgd.run(argv); } catch(Throwable e){ e.printStackTrace(); diff --git a/tez-tools/analyzers/job-analyzer/src/main/java/org/apache/tez/analyzer/plugins/ContainerReuseAnalyzer.java b/tez-tools/analyzers/job-analyzer/src/main/java/org/apache/tez/analyzer/plugins/ContainerReuseAnalyzer.java index 5b862f87f7..553ff0e2cc 100644 --- a/tez-tools/analyzers/job-analyzer/src/main/java/org/apache/tez/analyzer/plugins/ContainerReuseAnalyzer.java +++ b/tez-tools/analyzers/job-analyzer/src/main/java/org/apache/tez/analyzer/plugins/ContainerReuseAnalyzer.java @@ -39,15 +39,13 @@ */ public class ContainerReuseAnalyzer extends TezAnalyzerBase implements Analyzer { - private final Configuration config; - private static final String[] headers = { "vertexName", "taskAttempts", "node", "containerId", "reuseCount" }; private final CSVResult csvResult; public ContainerReuseAnalyzer(Configuration config) { - this.config = config; + super(config); this.csvResult = new CSVResult(headers); } @@ -82,11 +80,6 @@ public String getDescription() { return "Get details on container reuse analysis"; } - @Override - public Configuration getConfiguration() { - return config; - } - public static void main(String[] args) throws Exception { Configuration config = new Configuration(); ContainerReuseAnalyzer analyzer = new ContainerReuseAnalyzer(config); diff --git a/tez-tools/analyzers/job-analyzer/src/main/java/org/apache/tez/analyzer/plugins/CriticalPathAnalyzer.java b/tez-tools/analyzers/job-analyzer/src/main/java/org/apache/tez/analyzer/plugins/CriticalPathAnalyzer.java index 2edce3ed4e..3f5e3004b8 100644 --- a/tez-tools/analyzers/job-analyzer/src/main/java/org/apache/tez/analyzer/plugins/CriticalPathAnalyzer.java +++ b/tez-tools/analyzers/job-analyzer/src/main/java/org/apache/tez/analyzer/plugins/CriticalPathAnalyzer.java @@ -26,7 +26,6 @@ import java.util.Map; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.util.StringInterner; import org.apache.hadoop.util.ToolRunner; import org.apache.tez.analyzer.Analyzer; import org.apache.tez.analyzer.CSVResult; @@ -43,7 +42,7 @@ import org.apache.tez.history.parser.datamodel.TaskInfo; import com.google.common.base.Joiner; -import com.google.common.base.Preconditions; +import org.apache.tez.common.Preconditions; import com.google.common.base.Strings; import com.google.common.collect.Lists; import com.google.common.collect.Maps; @@ -54,8 +53,8 @@ public class CriticalPathAnalyzer extends TezAnalyzerBase implements Analyzer { private static final Logger LOG = LoggerFactory.getLogger(CriticalPathAnalyzer.class); - String succeededState = StringInterner.weakIntern(TaskAttemptState.SUCCEEDED.name()); - String failedState = StringInterner.weakIntern(TaskAttemptState.FAILED.name()); + private static final String SUCCEEDED_STATE = TaskAttemptState.SUCCEEDED.name(); + private static final String FAILED_STATE = TaskAttemptState.FAILED.name(); public enum CriticalPathDependency { DATA_DEPENDENCY, @@ -114,10 +113,11 @@ public List getNotes() { ArrayList concurrencyByTime = Lists.newArrayList(); public CriticalPathAnalyzer() { + super(new Configuration()); } public CriticalPathAnalyzer(Configuration conf) { - setConf(conf); + super(conf); } @Override @@ -130,8 +130,8 @@ public void analyze(DagInfo dagInfo) throws TezException { for (TaskInfo task : vertex.getTasks()) { for (TaskAttemptInfo attempt : task.getTaskAttempts()) { attempts.put(attempt.getTaskAttemptId(), attempt); - if (attempt.getStatus().equals(succeededState) || - attempt.getStatus().equals(failedState)) { + if (attempt.getStatus().equals(SUCCEEDED_STATE) || + attempt.getStatus().equals(FAILED_STATE)) { if (lastAttemptFinishTime < attempt.getFinishTime()) { lastAttempt = attempt; lastAttemptFinishTime = attempt.getFinishTime(); @@ -644,13 +644,9 @@ public String getDescription() { return "Analyze critical path of the DAG"; } - @Override - public Configuration getConfiguration() { - return getConf(); - } - public static void main(String[] args) throws Exception { - int res = ToolRunner.run(new Configuration(), new CriticalPathAnalyzer(), args); + Configuration config = new Configuration(); + int res = ToolRunner.run(config, new CriticalPathAnalyzer(config), args); System.exit(res); } diff --git a/tez-tools/analyzers/job-analyzer/src/main/java/org/apache/tez/analyzer/plugins/DagOverviewAnalyzer.java b/tez-tools/analyzers/job-analyzer/src/main/java/org/apache/tez/analyzer/plugins/DagOverviewAnalyzer.java new file mode 100644 index 0000000000..b193c30a90 --- /dev/null +++ b/tez-tools/analyzers/job-analyzer/src/main/java/org/apache/tez/analyzer/plugins/DagOverviewAnalyzer.java @@ -0,0 +1,132 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

    + * http://www.apache.org/licenses/LICENSE-2.0 + *

    + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tez.analyzer.plugins; + +import java.text.SimpleDateFormat; +import java.util.Comparator; +import java.util.Date; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.util.ToolRunner; +import org.apache.tez.analyzer.Analyzer; +import org.apache.tez.analyzer.CSVResult; +import org.apache.tez.analyzer.Result; +import org.apache.tez.dag.api.TezException; +import org.apache.tez.history.parser.datamodel.DagInfo; +import org.apache.tez.history.parser.datamodel.Event; +import org.apache.tez.history.parser.datamodel.TaskAttemptInfo; +import org.apache.tez.history.parser.datamodel.TaskInfo; +import org.apache.tez.history.parser.datamodel.VertexInfo; + +public class DagOverviewAnalyzer extends TezAnalyzerBase implements Analyzer { + private final String[] headers = + { "name", "id", "event_type", "status", "event_time", "event_time_str", "vertex_task_stats", "diagnostics" }; + private final CSVResult csvResult; + private static final SimpleDateFormat FORMAT = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS"); + + public DagOverviewAnalyzer(Configuration config) { + super(config); + csvResult = new CSVResult(headers); + } + + @Override + public void analyze(DagInfo dagInfo) throws TezException { + for (Event event : dagInfo.getEvents()) { + csvResult.addRecord(new String[] { dagInfo.getDagId(), dagInfo.getDagId(), event.getType(), + dagInfo.getStatus(), Long.toString(event.getTime()), toDateStr(event.getTime()), "", "" }); + } + for (VertexInfo vertex : dagInfo.getVertices()) { + for (Event event : vertex.getEvents()) { + String vertexFailureInfoIfAny = ""; + for (TaskAttemptInfo attempt : vertex.getTaskAttempts()) { + if (attempt.getStatus().contains("FAILED")) { + vertexFailureInfoIfAny = attempt.getTaskAttemptId() + ": " + + attempt.getDiagnostics().replaceAll(",", " ").replaceAll("\n", " "); + break; + } + } + csvResult.addRecord(new String[] { vertex.getVertexName(), vertex.getVertexId(), + event.getType(), vertex.getStatus(), Long.toString(event.getTime()), + toDateStr(event.getTime()), getTaskStats(vertex), vertexFailureInfoIfAny }); + } + + // a failed task can lead to dag failure, so hopefully holds valuable information + for (TaskInfo failedTask : vertex.getFailedTasks()) { + for (Event failedTaskEvent : failedTask.getEvents()) { + if (failedTaskEvent.getType().equalsIgnoreCase("TASK_FINISHED")) { + csvResult.addRecord(new String[] { vertex.getVertexName(), failedTask.getTaskId(), + failedTaskEvent.getType(), failedTask.getStatus(), Long.toString(failedTaskEvent.getTime()), + toDateStr(failedTaskEvent.getTime()), getTaskStats(vertex), + failedTask.getDiagnostics().replaceAll(",", " ").replaceAll("\n", " ") }); + } + } + // if we already found a failing task, let's scan the failing attempts as well + for (TaskAttemptInfo failedAttempt : failedTask.getFailedTaskAttempts()) { + for (Event failedTaskAttemptEvent : failedAttempt.getEvents()) { + if (failedTaskAttemptEvent.getType().equalsIgnoreCase("TASK_ATTEMPT_FINISHED")) { + csvResult.addRecord(new String[] { vertex.getVertexName(), + failedAttempt.getTaskAttemptId(), failedTaskAttemptEvent.getType(), + failedAttempt.getStatus(), Long.toString(failedTaskAttemptEvent.getTime()), + toDateStr(failedTaskAttemptEvent.getTime()), getTaskStats(vertex), + failedAttempt.getDiagnostics().replaceAll(",", " ").replaceAll("\n", " ") }); + } + } + } + } + } + + csvResult.sort(new Comparator() { + public int compare(String[] first, String[] second) { + return (int) (Long.parseLong(first[4]) - Long.parseLong(second[4])); + } + }); + } + + private String getTaskStats(VertexInfo vertex) { + return String.format("numTasks: %d failedTasks: %d completedTasks: %d", vertex.getNumTasks(), + vertex.getFailedTasksCount(), vertex.getCompletedTasksCount()); + } + + private static synchronized String toDateStr(long time) { + return FORMAT.format(new Date(time)); + } + + @Override + public Result getResult() throws TezException { + return csvResult; + } + + @Override + public String getName() { + return "Dag overview analyzer"; + } + + @Override + public String getDescription() { + return "High level dag events overview (dag, vertex event summary)." + + " Helps understand the overall progress of a dag by simply listing the dag/vertex related events"; + } + + public static void main(String[] args) throws Exception { + Configuration config = new Configuration(); + DagOverviewAnalyzer analyzer = new DagOverviewAnalyzer(config); + int res = ToolRunner.run(config, analyzer, args); + analyzer.printResults(); + System.exit(res); + } +} diff --git a/tez-tools/analyzers/job-analyzer/src/main/java/org/apache/tez/analyzer/plugins/HungTaskAnalyzer.java b/tez-tools/analyzers/job-analyzer/src/main/java/org/apache/tez/analyzer/plugins/HungTaskAnalyzer.java new file mode 100644 index 0000000000..9a38e28ba2 --- /dev/null +++ b/tez-tools/analyzers/job-analyzer/src/main/java/org/apache/tez/analyzer/plugins/HungTaskAnalyzer.java @@ -0,0 +1,143 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

    + * http://www.apache.org/licenses/LICENSE-2.0 + *

    + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tez.analyzer.plugins; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.util.ToolRunner; +import org.apache.tez.analyzer.Analyzer; +import org.apache.tez.analyzer.CSVResult; +import org.apache.tez.analyzer.Result; +import org.apache.tez.dag.api.TezException; +import org.apache.tez.dag.records.TezTaskAttemptID; +import org.apache.tez.history.parser.datamodel.DagInfo; +import org.apache.tez.history.parser.datamodel.TaskAttemptInfo; +import org.apache.tez.history.parser.datamodel.VertexInfo; + +import java.util.Comparator; +import java.util.HashMap; +import java.util.Map; + +/** + * Gives insights about hanging task attempts by providing details about last attempts of all tasks. + */ +public class HungTaskAnalyzer extends TezAnalyzerBase implements Analyzer { + private final String[] headers = { "vertex", "task", " number_of_attempts", "last_attempt_id", + "last_attempt_status", "last_attempt_duration_ms", "last_attempt_node" }; + private final CSVResult csvResult; + + private static final String HEADER_NUM_ATTEMPTS = "num_attempts"; + private static final String HEADER_LAST_ATTEMPT_ID_AND_STATUS = "last_attempt_id_and_status"; + private static final String HEADER_LAST_ATTEMPT_STATUS = "last_attempt_status"; + private static final String HEADER_LAST_ATTEMPT_NODE = "last_attempt_node"; + private static final String HEADER_LAST_ATTEMPT_DURATION_MS = "last_attempt_duration_ms"; + + public HungTaskAnalyzer(Configuration config) { + super(config); + csvResult = new CSVResult(headers); + } + + @Override + public void analyze(DagInfo dagInfo) throws TezException { + Map> taskData = new HashMap<>(); // task attempt count per task + for (VertexInfo vertex : dagInfo.getVertices()) { + taskData.clear(); + for (TaskAttemptInfo attempt : vertex.getTaskAttempts()) { + String taskId = attempt.getTaskInfo().getTaskId(); + + int numAttemptsForTask = attempt.getTaskInfo().getNumberOfTaskAttempts(); + Map thisTaskData = taskData.get(taskId); + + if (thisTaskData == null) { + thisTaskData = new HashMap<>(); + thisTaskData.put(HEADER_NUM_ATTEMPTS, Integer.toString(numAttemptsForTask)); + taskData.put(taskId, thisTaskData); + } + + int attemptNumber = TezTaskAttemptID.fromString(attempt.getTaskAttemptId()).getId(); + if (attemptNumber == numAttemptsForTask - 1) { + thisTaskData.put(HEADER_LAST_ATTEMPT_ID_AND_STATUS, String.format("%s/%s", attempt.getTaskAttemptId(), attempt.getStatus())); + thisTaskData.put(HEADER_LAST_ATTEMPT_STATUS, attempt.getDetailedStatus()); + thisTaskData.put(HEADER_LAST_ATTEMPT_NODE, attempt.getNodeId()); + + thisTaskData.put(HEADER_LAST_ATTEMPT_DURATION_MS, + (attempt.getFinishTime() == 0 || attempt.getStartTime() == 0) ? "-1" + : Long.toString(attempt.getFinishTime() - attempt.getStartTime())); + } + } + for (Map.Entry> task : taskData.entrySet()) { + addARecord(vertex.getVertexName(), task.getKey(), task.getValue().get(HEADER_NUM_ATTEMPTS), + task.getValue().get(HEADER_LAST_ATTEMPT_ID_AND_STATUS), task.getValue().get(HEADER_LAST_ATTEMPT_STATUS), + task.getValue().get(HEADER_LAST_ATTEMPT_DURATION_MS), + task.getValue().get(HEADER_LAST_ATTEMPT_NODE)); + } + } + + csvResult.sort(new Comparator() { + public int compare(String[] first, String[] second) { + int vertexOrder = first[0].compareTo(second[0]); + int lastAttemptStatusOrder = + (first[4] == null || second[4] == null) ? 0 : first[4].compareTo(second[4]); + int attemptNumberOrder = Integer.valueOf(second[2]).compareTo(Integer.valueOf(first[2])); + + return vertexOrder == 0 + ? (lastAttemptStatusOrder == 0 ? attemptNumberOrder : lastAttemptStatusOrder) + : vertexOrder; + } + }); + } + + private void addARecord(String vertexName, String taskId, String numAttempts, + String lastAttemptId, String lastAttemptStatus, String lastAttemptDuration, + String lastAttemptNode) { + String[] record = new String[7]; + record[0] = vertexName; + record[1] = taskId; + record[2] = numAttempts; + record[3] = lastAttemptId; + record[4] = lastAttemptStatus; + record[5] = lastAttemptDuration; + record[6] = lastAttemptNode; + + csvResult.addRecord(record); + } + + @Override + public Result getResult() throws TezException { + return csvResult; + } + + @Override + public String getName() { + return "Hung Task Analyzer"; + } + + @Override + public String getDescription() { + return "TaskHandAnalyzer can give quick insights about hanging task attempts" + + " by giving an overview of all tasks and their last attempts' status, duration, etc."; + } + + public static void main(String[] args) throws Exception { + Configuration config = new Configuration(); + HungTaskAnalyzer analyzer = new HungTaskAnalyzer(config); + int res = ToolRunner.run(config, analyzer, args); + analyzer.printResults(); + System.exit(res); + } +} diff --git a/tez-tools/analyzers/job-analyzer/src/main/java/org/apache/tez/analyzer/plugins/InputReadErrorAnalyzer.java b/tez-tools/analyzers/job-analyzer/src/main/java/org/apache/tez/analyzer/plugins/InputReadErrorAnalyzer.java new file mode 100644 index 0000000000..3cb523ff9a --- /dev/null +++ b/tez-tools/analyzers/job-analyzer/src/main/java/org/apache/tez/analyzer/plugins/InputReadErrorAnalyzer.java @@ -0,0 +1,94 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

    + * http://www.apache.org/licenses/LICENSE-2.0 + *

    + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tez.analyzer.plugins; + +import java.util.Comparator; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.util.ToolRunner; +import org.apache.tez.analyzer.Analyzer; +import org.apache.tez.analyzer.CSVResult; +import org.apache.tez.analyzer.Result; +import org.apache.tez.dag.api.TezException; +import org.apache.tez.history.parser.datamodel.DagInfo; +import org.apache.tez.history.parser.datamodel.Event; +import org.apache.tez.history.parser.datamodel.TaskAttemptInfo; +import org.apache.tez.history.parser.datamodel.VertexInfo; + +/** + * Helps finding the root cause of shuffle errors, e.g. which node(s) can be blamed for them. + */ +public class InputReadErrorAnalyzer extends TezAnalyzerBase implements Analyzer { + private final String[] headers = { "vertex:attempt", "status", "time", "node", "diagnostics" }; + private final CSVResult csvResult; + + public InputReadErrorAnalyzer(Configuration config) { + super(config); + csvResult = new CSVResult(headers); + } + + @Override + public void analyze(DagInfo dagInfo) throws TezException { + for (VertexInfo vertex : dagInfo.getVertices()) { + for (TaskAttemptInfo attempt : vertex.getTaskAttempts()) { + String terminationCause = attempt.getTerminationCause(); + if ("INPUT_READ_ERROR".equalsIgnoreCase(terminationCause) + || "OUTPUT_LOST".equalsIgnoreCase(terminationCause) + || "NODE_FAILED".equalsIgnoreCase(terminationCause)) { + for (Event event : attempt.getEvents()) { + if (event.getType().equalsIgnoreCase("TASK_ATTEMPT_FINISHED")) { + csvResult.addRecord(new String[] { + vertex.getVertexName() + ":" + attempt.getTaskAttemptId(), + attempt.getDetailedStatus(), String.valueOf(event.getTime()), attempt.getNodeId(), + attempt.getDiagnostics().replaceAll(",", " ").replaceAll("\n", " ") }); + } + } + } + } + } + + csvResult.sort(new Comparator() { + public int compare(String[] first, String[] second) { + return (int) (Long.parseLong(second[2]) - Long.parseLong(first[2])); + } + }); + } + + @Override + public Result getResult() throws TezException { + return csvResult; + } + + @Override + public String getName() { + return "Input read error analyzer"; + } + + @Override + public String getDescription() { + return "Prints every task attempt (with node) which are related to input read errors"; + } + + public static void main(String[] args) throws Exception { + Configuration config = new Configuration(); + InputReadErrorAnalyzer analyzer = new InputReadErrorAnalyzer(config); + int res = ToolRunner.run(config, analyzer, args); + analyzer.printResults(); + System.exit(res); + } +} diff --git a/tez-tools/analyzers/job-analyzer/src/main/java/org/apache/tez/analyzer/plugins/LocalityAnalyzer.java b/tez-tools/analyzers/job-analyzer/src/main/java/org/apache/tez/analyzer/plugins/LocalityAnalyzer.java index ec72df17ac..d640704f93 100644 --- a/tez-tools/analyzers/job-analyzer/src/main/java/org/apache/tez/analyzer/plugins/LocalityAnalyzer.java +++ b/tez-tools/analyzers/job-analyzer/src/main/java/org/apache/tez/analyzer/plugins/LocalityAnalyzer.java @@ -52,12 +52,10 @@ public class LocalityAnalyzer extends TezAnalyzerBase implements Analyzer { private static final String DATA_LOCAL_RATIO = "tez.locality-analyzer.data.local.ratio"; private static final float DATA_LOCAL_RATIO_DEFAULT = 0.5f; - private final Configuration config; - private final CSVResult csvResult; public LocalityAnalyzer(Configuration config) { - this.config = config; + super(config); csvResult = new CSVResult(headers); } @@ -119,7 +117,7 @@ public void analyze(DagInfo dagInfo) throws TezException { record.add(otherTaskResult.avgHDFSBytesRead + ""); String recommendation = ""; - if (dataLocalRatio < config.getFloat(DATA_LOCAL_RATIO, DATA_LOCAL_RATIO_DEFAULT)) { + if (dataLocalRatio < getConf().getFloat(DATA_LOCAL_RATIO, DATA_LOCAL_RATIO_DEFAULT)) { recommendation = "Data locality is poor for this vertex. Try tuning " + TezConfiguration.TEZ_AM_CONTAINER_REUSE_LOCALITY_DELAY_ALLOCATION_MILLIS + ", " + TezConfiguration.TEZ_AM_CONTAINER_REUSE_RACK_FALLBACK_ENABLED + ", " @@ -182,10 +180,6 @@ private TaskAttemptDetails computeAverages(VertexInfo vertexInfo, DAGCounter cou return "Analyze for locality information (data local, rack local, off-rack)"; } - @Override public Configuration getConfiguration() { - return config; - } - /** * Placeholder for task attempt details */ diff --git a/tez-tools/analyzers/job-analyzer/src/main/java/org/apache/tez/analyzer/plugins/OneOnOneEdgeAnalyzer.java b/tez-tools/analyzers/job-analyzer/src/main/java/org/apache/tez/analyzer/plugins/OneOnOneEdgeAnalyzer.java index 2ba715ed43..a6cb3f1511 100644 --- a/tez-tools/analyzers/job-analyzer/src/main/java/org/apache/tez/analyzer/plugins/OneOnOneEdgeAnalyzer.java +++ b/tez-tools/analyzers/job-analyzer/src/main/java/org/apache/tez/analyzer/plugins/OneOnOneEdgeAnalyzer.java @@ -55,12 +55,10 @@ public class OneOnOneEdgeAnalyzer extends TezAnalyzerBase implements Analyzer { // DataMovementType::ONE_TO_ONE private static final String ONE_TO_ONE = "ONE_TO_ONE"; - private final Configuration config; - private final CSVResult csvResult; public OneOnOneEdgeAnalyzer(Configuration config) { - this.config = config; + super(config); csvResult = new CSVResult(headers); } @@ -140,11 +138,6 @@ public String getDescription() { return "To understand the locality miss in 1:1 edge"; } - @Override - public Configuration getConfiguration() { - return config; - } - public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); OneOnOneEdgeAnalyzer analyzer = new OneOnOneEdgeAnalyzer(conf); diff --git a/tez-tools/analyzers/job-analyzer/src/main/java/org/apache/tez/analyzer/plugins/ShuffleTimeAnalyzer.java b/tez-tools/analyzers/job-analyzer/src/main/java/org/apache/tez/analyzer/plugins/ShuffleTimeAnalyzer.java index 57e91c62d3..f8f9112bb7 100644 --- a/tez-tools/analyzers/job-analyzer/src/main/java/org/apache/tez/analyzer/plugins/ShuffleTimeAnalyzer.java +++ b/tez-tools/analyzers/job-analyzer/src/main/java/org/apache/tez/analyzer/plugins/ShuffleTimeAnalyzer.java @@ -66,14 +66,12 @@ public class ShuffleTimeAnalyzer extends TezAnalyzerBase implements Analyzer { private final CSVResult csvResult = new CSVResult(headers); - private final Configuration config; - private final float realWorkDoneRatio; private final long minShuffleRecords; public ShuffleTimeAnalyzer(Configuration config) { - this.config = config; + super(config); realWorkDoneRatio = config.getFloat (REAL_WORK_DONE_RATIO, REAL_WORK_DONE_RATIO_DEFAULT); @@ -208,11 +206,6 @@ public String getDescription() { + "and the real work done in the task"; } - @Override - public Configuration getConfiguration() { - return config; - } - public static void main(String[] args) throws Exception { Configuration config = new Configuration(); ShuffleTimeAnalyzer analyzer = new ShuffleTimeAnalyzer(config); diff --git a/tez-tools/analyzers/job-analyzer/src/main/java/org/apache/tez/analyzer/plugins/SkewAnalyzer.java b/tez-tools/analyzers/job-analyzer/src/main/java/org/apache/tez/analyzer/plugins/SkewAnalyzer.java index 067d871954..b0387d1973 100644 --- a/tez-tools/analyzers/job-analyzer/src/main/java/org/apache/tez/analyzer/plugins/SkewAnalyzer.java +++ b/tez-tools/analyzers/job-analyzer/src/main/java/org/apache/tez/analyzer/plugins/SkewAnalyzer.java @@ -18,7 +18,6 @@ package org.apache.tez.analyzer.plugins; -import com.google.common.base.Preconditions; import com.google.common.collect.Lists; import org.apache.hadoop.conf.Configuration; @@ -34,6 +33,7 @@ import java.util.List; import java.util.Map; +import java.util.Objects; /** @@ -85,14 +85,12 @@ public class SkewAnalyzer extends TezAnalyzerBase implements Analyzer { private final CSVResult csvResult = new CSVResult(headers); - private final Configuration config; - private final float minRatio; private final float maxRatio; private final long maxShuffleBytesPerSource; public SkewAnalyzer(Configuration config) { - this.config = config; + super(config); maxRatio = config.getFloat(ATTEMPT_SHUFFLE_KEY_GROUP_MAX_RATIO, ATTEMPT_SHUFFLE_KEY_GROUP_MAX_RATIO_DEFAULT); minRatio = config.getFloat(ATTEMPT_SHUFFLE_KEY_GROUP_MIN_RATIO, @@ -103,8 +101,7 @@ public SkewAnalyzer(Configuration config) { @Override public void analyze(DagInfo dagInfo) throws TezException { - Preconditions.checkArgument(dagInfo != null, "DAG can't be null"); - analyzeReducers(dagInfo); + analyzeReducers(Objects.requireNonNull(dagInfo, "DAG can't be null")); } private void analyzeReducers(DagInfo dagInfo) { @@ -214,7 +211,7 @@ private void analyzeRecordSkewPerSource(TaskAttemptInfo attemptInfo) { if (vertexNumTasks > 1) { if (ratio > maxRatio) { //input records > 60% of vertex level record count - if (inputRecordsCount > (vertexLevelInputRecordsCount * 0.60)) { + if (inputRecordsCount > (vertexLevelInputRecordsCount * 0.6f)) { List result = Lists.newLinkedList(); result.add(attemptInfo.getTaskInfo().getVertexInfo().getVertexName()); result.add(attemptInfo.getTaskAttemptId()); @@ -305,12 +302,7 @@ public String getName() { @Override public String getDescription() { - return "Analyzer reducer skews by mining reducer task counters"; - } - - @Override - public Configuration getConfiguration() { - return null; + return "Analyze reducer skews by mining reducer task counters"; } public static void main(String[] args) throws Exception { diff --git a/tez-tools/analyzers/job-analyzer/src/main/java/org/apache/tez/analyzer/plugins/SlowNodeAnalyzer.java b/tez-tools/analyzers/job-analyzer/src/main/java/org/apache/tez/analyzer/plugins/SlowNodeAnalyzer.java index a810a8a645..9e573c2033 100644 --- a/tez-tools/analyzers/job-analyzer/src/main/java/org/apache/tez/analyzer/plugins/SlowNodeAnalyzer.java +++ b/tez-tools/analyzers/job-analyzer/src/main/java/org/apache/tez/analyzer/plugins/SlowNodeAnalyzer.java @@ -59,10 +59,8 @@ public class SlowNodeAnalyzer extends TezAnalyzerBase implements Analyzer { private final CSVResult csvResult = new CSVResult(headers); - private final Configuration config; - public SlowNodeAnalyzer(Configuration config) { - this.config = config; + super(config); } @Override @@ -182,11 +180,6 @@ public String getDescription() { return sb.toString(); } - @Override - public Configuration getConfiguration() { - return config; - } - public static void main(String[] args) throws Exception { Configuration config = new Configuration(); SlowNodeAnalyzer analyzer = new SlowNodeAnalyzer(config); diff --git a/tez-tools/analyzers/job-analyzer/src/main/java/org/apache/tez/analyzer/plugins/SlowTaskIdentifier.java b/tez-tools/analyzers/job-analyzer/src/main/java/org/apache/tez/analyzer/plugins/SlowTaskIdentifier.java index d2474ad0f6..7c9958b250 100644 --- a/tez-tools/analyzers/job-analyzer/src/main/java/org/apache/tez/analyzer/plugins/SlowTaskIdentifier.java +++ b/tez-tools/analyzers/job-analyzer/src/main/java/org/apache/tez/analyzer/plugins/SlowTaskIdentifier.java @@ -51,10 +51,8 @@ public class SlowTaskIdentifier extends TezAnalyzerBase implements Analyzer { private static final String NO_OF_TASKS = "tez.slow-task-analyzer.task.count"; private static final int NO_OF_TASKS_DEFAULT = 100; - private final Configuration config; - public SlowTaskIdentifier(Configuration config) { - this.config = config; + super(config); this.csvResult = new CSVResult(headers); } @@ -75,7 +73,7 @@ public void analyze(DagInfo dagInfo) throws TezException { }); int limit = Math.min(taskAttempts.size(), - Math.max(0, config.getInt(NO_OF_TASKS, NO_OF_TASKS_DEFAULT))); + Math.max(0, getConf().getInt(NO_OF_TASKS, NO_OF_TASKS_DEFAULT))); if (limit == 0) { return; @@ -111,11 +109,6 @@ public String getDescription() { return "Identifies slow tasks in the DAG"; } - @Override - public Configuration getConfiguration() { - return config; - } - public static void main(String[] args) throws Exception { Configuration config = new Configuration(); SlowTaskIdentifier analyzer = new SlowTaskIdentifier(config); diff --git a/tez-tools/analyzers/job-analyzer/src/main/java/org/apache/tez/analyzer/plugins/SlowestVertexAnalyzer.java b/tez-tools/analyzers/job-analyzer/src/main/java/org/apache/tez/analyzer/plugins/SlowestVertexAnalyzer.java index 33f2421699..74fff0ee02 100644 --- a/tez-tools/analyzers/job-analyzer/src/main/java/org/apache/tez/analyzer/plugins/SlowestVertexAnalyzer.java +++ b/tez-tools/analyzers/job-analyzer/src/main/java/org/apache/tez/analyzer/plugins/SlowestVertexAnalyzer.java @@ -49,7 +49,6 @@ public class SlowestVertexAnalyzer extends TezAnalyzerBase implements Analyzer { private final CSVResult csvResult = new CSVResult(headers); - private final Configuration config; private final MetricRegistry metrics = new MetricRegistry(); private Histogram taskAttemptRuntimeHistorgram; @@ -59,7 +58,7 @@ public class SlowestVertexAnalyzer extends TezAnalyzerBase implements Analyzer { private final long vertexRuntimeThreshold; public SlowestVertexAnalyzer(Configuration config) { - this.config = config; + super(config); this.vertexRuntimeThreshold = Math.max(1, config.getLong(MAX_VERTEX_RUNTIME, MAX_VERTEX_RUNTIME_DEFAULT)); @@ -144,7 +143,7 @@ public void analyze(DagInfo dagInfo) throws TezException { record.add(shuffleMaxSource); record.add(Math.max(0, slowestLastEventTime) + ""); record.add(maxSourceName); - //Finding out real_work done at vertex level might be meaningless (as it is quite posisble + //Finding out real_work done at vertex level might be meaningless (as it is quite possible // that it went to starvation). StringBuilder sb = new StringBuilder(); @@ -204,11 +203,6 @@ public String getDescription() { return "Identify the slowest vertex in the DAG, which needs to be looked into first"; } - @Override - public Configuration getConfiguration() { - return config; - } - public static void main(String[] args) throws Exception { Configuration config = new Configuration(); SlowestVertexAnalyzer analyzer = new SlowestVertexAnalyzer(config); diff --git a/tez-tools/analyzers/job-analyzer/src/main/java/org/apache/tez/analyzer/plugins/SpillAnalyzerImpl.java b/tez-tools/analyzers/job-analyzer/src/main/java/org/apache/tez/analyzer/plugins/SpillAnalyzerImpl.java index d69ca23b5a..f0ce205418 100644 --- a/tez-tools/analyzers/job-analyzer/src/main/java/org/apache/tez/analyzer/plugins/SpillAnalyzerImpl.java +++ b/tez-tools/analyzers/job-analyzer/src/main/java/org/apache/tez/analyzer/plugins/SpillAnalyzerImpl.java @@ -52,7 +52,7 @@ public class SpillAnalyzerImpl extends TezAnalyzerBase implements Analyzer { private final CSVResult csvResult; /** - * Minimum output bytes that should be chunrned out by a task + * Minimum output bytes that should be churned out by a task */ private static final String OUTPUT_BYTES_THRESHOLD = "tez.spill-analyzer.min.output.bytes" + ".threshold"; @@ -60,10 +60,8 @@ public class SpillAnalyzerImpl extends TezAnalyzerBase implements Analyzer { private final long minOutputBytesPerTask; - private final Configuration config; - public SpillAnalyzerImpl(Configuration config) { - this.config = config; + super(config); minOutputBytesPerTask = Math.max(0, config.getLong(OUTPUT_BYTES_THRESHOLD, OUTPUT_BYTES_THRESHOLD_DEFAULT)); this.csvResult = new CSVResult(headers); @@ -130,11 +128,6 @@ public String getDescription() { return "Analyze spill details in the task"; } - @Override - public Configuration getConfiguration() { - return config; - } - public static void main(String[] args) throws Exception { Configuration config = new Configuration(); SpillAnalyzerImpl analyzer = new SpillAnalyzerImpl(config); diff --git a/tez-tools/analyzers/job-analyzer/src/main/java/org/apache/tez/analyzer/plugins/TaskAssignmentAnalyzer.java b/tez-tools/analyzers/job-analyzer/src/main/java/org/apache/tez/analyzer/plugins/TaskAssignmentAnalyzer.java index ce6fa417c1..02b821f367 100644 --- a/tez-tools/analyzers/job-analyzer/src/main/java/org/apache/tez/analyzer/plugins/TaskAssignmentAnalyzer.java +++ b/tez-tools/analyzers/job-analyzer/src/main/java/org/apache/tez/analyzer/plugins/TaskAssignmentAnalyzer.java @@ -36,29 +36,27 @@ */ public class TaskAssignmentAnalyzer extends TezAnalyzerBase implements Analyzer { - private final String[] headers = { "vertex", "node", "numTasks", "load" }; - private final Configuration config; + private final String[] headers = { "vertex", "node", "numTaskAttempts", "load" }; private final CSVResult csvResult; public TaskAssignmentAnalyzer(Configuration config) { - this.config = config; + super(config); csvResult = new CSVResult(headers); } @Override public void analyze(DagInfo dagInfo) throws TezException { - Map map = new HashMap<>(); + Map taskAttemptsPerNode = new HashMap<>(); for (VertexInfo vertex : dagInfo.getVertices()) { - map.clear(); + taskAttemptsPerNode.clear(); for (TaskAttemptInfo attempt : vertex.getTaskAttempts()) { - Integer previousValue = map.get(attempt.getNodeId()); - map.put(attempt.getNodeId(), - previousValue == null ? 1 : previousValue + 1); + Integer previousValue = taskAttemptsPerNode.get(attempt.getNodeId()); + taskAttemptsPerNode.put(attempt.getNodeId(), previousValue == null ? 1 : previousValue + 1); } - double mean = vertex.getTaskAttempts().size() / Math.max(1.0, map.size()); - for (Map.Entry assignment : map.entrySet()) { - addARecord(vertex.getVertexName(), assignment.getKey(), - assignment.getValue(), assignment.getValue() * 100 / mean); + double mean = vertex.getTaskAttempts().size() / Math.max(1.0, taskAttemptsPerNode.size()); + for (Map.Entry assignment : taskAttemptsPerNode.entrySet()) { + addARecord(vertex.getVertexName(), assignment.getKey(), assignment.getValue(), + assignment.getValue() * 100 / mean); } } } @@ -88,11 +86,6 @@ public String getDescription() { return "Get the Task assignments on different nodes of the cluster"; } - @Override - public Configuration getConfiguration() { - return config; - } - public static void main(String[] args) throws Exception { Configuration config = new Configuration(); TaskAssignmentAnalyzer analyzer = new TaskAssignmentAnalyzer(config); diff --git a/tez-tools/analyzers/job-analyzer/src/main/java/org/apache/tez/analyzer/plugins/TaskAttemptResultStatisticsAnalyzer.java b/tez-tools/analyzers/job-analyzer/src/main/java/org/apache/tez/analyzer/plugins/TaskAttemptResultStatisticsAnalyzer.java new file mode 100644 index 0000000000..cf6b2f0d8e --- /dev/null +++ b/tez-tools/analyzers/job-analyzer/src/main/java/org/apache/tez/analyzer/plugins/TaskAttemptResultStatisticsAnalyzer.java @@ -0,0 +1,120 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

    + * http://www.apache.org/licenses/LICENSE-2.0 + *

    + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tez.analyzer.plugins; + +import java.util.Comparator; +import java.util.HashMap; +import java.util.Map; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.util.ToolRunner; +import org.apache.tez.analyzer.Analyzer; +import org.apache.tez.analyzer.CSVResult; +import org.apache.tez.analyzer.Result; +import org.apache.tez.dag.api.TezException; +import org.apache.tez.history.parser.datamodel.DagInfo; +import org.apache.tez.history.parser.datamodel.TaskAttemptInfo; +import org.apache.tez.history.parser.datamodel.VertexInfo; + +/** + * Get simple count of task attempt states on vertex:node:status level, like below. + * + * vertex (+task stats: all/succeeded/failed/killed),node,status,numAttempts + * Map 1 (vertex_x_y_z) (216/153/0/63),node1,KILLED:INTERNAL_PREEMPTION,1185 + * Map 1 (vertex_x_y_z) (216/153/0/63),node1,KILLED:TERMINATED_AT_SHUTDOWN,22 + * Map 1 (vertex_x_y_z) (216/153/0/63),node1,KILLED:EXTERNAL_PREEMPTION,3349 + * Map 1 (vertex_x_y_z) (216/153/0/63),node1,SUCCEEDED,1 + */ +public class TaskAttemptResultStatisticsAnalyzer extends TezAnalyzerBase implements Analyzer { + private final String[] headers = + { "vertex (+task stats: all/succeeded/failed/killed)", "node", "status", "numAttempts" }; + private final CSVResult csvResult; + + public TaskAttemptResultStatisticsAnalyzer(Configuration config) { + super(config); + csvResult = new CSVResult(headers); + } + + @Override + public void analyze(DagInfo dagInfo) throws TezException { + Map map = new HashMap<>(); + + for (VertexInfo vertex : dagInfo.getVertices()) { + String taskStatsInVertex = + String.format("%s/%s/%s/%s", vertex.getNumTasks(), vertex.getSucceededTasksCount(), + vertex.getFailedTasksCount(), vertex.getKilledTasksCount()); + for (TaskAttemptInfo attempt : vertex.getTaskAttempts()) { + String key = String.format("%s#%s#%s", + String.format("%s (%s) (%s)", vertex.getVertexName(), vertex.getVertexId(), + taskStatsInVertex), + attempt.getNodeId(), attempt.getDetailedStatus()); + Integer previousValue = (Integer) map.get(key); + map.put(key, previousValue == null ? 1 : previousValue + 1); + } + } + + map.forEach((key, value) -> { + String[] keys = key.split("#"); + addARecord(keys[0], keys[1], keys.length > 2 ? keys[2] : "", value); + }); + + csvResult.sort(new Comparator() { + public int compare(String[] first, String[] second) { + int vertexOrder = first[0].compareTo(second[0]); + int nodeOrder = first[1].compareTo(second[1]); + int statusOrder = first[2].compareTo(second[2]); + + return vertexOrder == 0 ? (nodeOrder == 0 ? statusOrder : nodeOrder) : vertexOrder; + } + }); + } + + private void addARecord(String vertexData, String node, String status, + int numAttempts) { + String[] record = new String[4]; + record[0] = vertexData; + record[1] = node; + record[2] = status; + record[3] = Integer.toString(numAttempts); + csvResult.addRecord(record); + } + + @Override + public Result getResult() throws TezException { + return csvResult; + } + + @Override + public String getName() { + return "Task Attempt Result Statistics Analyzer"; + } + + @Override + public String getDescription() { + return "Get statistics about task attempts states in vertex:node:status level"; + } + + public static void main(String[] args) throws Exception { + Configuration config = new Configuration(); + TaskAttemptResultStatisticsAnalyzer analyzer = new TaskAttemptResultStatisticsAnalyzer(config); + int res = ToolRunner.run(config, analyzer, args); + analyzer.printResults(); + System.exit(res); + } +} diff --git a/tez-tools/analyzers/job-analyzer/src/main/java/org/apache/tez/analyzer/plugins/TaskConcurrencyAnalyzer.java b/tez-tools/analyzers/job-analyzer/src/main/java/org/apache/tez/analyzer/plugins/TaskConcurrencyAnalyzer.java index 72f3b36a5b..91f51b4c21 100644 --- a/tez-tools/analyzers/job-analyzer/src/main/java/org/apache/tez/analyzer/plugins/TaskConcurrencyAnalyzer.java +++ b/tez-tools/analyzers/job-analyzer/src/main/java/org/apache/tez/analyzer/plugins/TaskConcurrencyAnalyzer.java @@ -41,11 +41,10 @@ public class TaskConcurrencyAnalyzer extends TezAnalyzerBase implements Analyzer private static final String[] headers = { "time", "vertexName", "concurrentTasksRunning" }; private final CSVResult csvResult; - private final Configuration config; public TaskConcurrencyAnalyzer(Configuration conf) { + super(conf); this.csvResult = new CSVResult(headers); - this.config = conf; } private enum EventType {START, FINISH} @@ -153,11 +152,6 @@ public String getDescription() { + "would be helpful in understanding whether any starvation was there or not."; } - @Override - public Configuration getConfiguration() { - return config; - } - public static void main(String[] args) throws Exception { Configuration config = new Configuration(); TaskConcurrencyAnalyzer analyzer = new TaskConcurrencyAnalyzer(config); diff --git a/tez-tools/analyzers/job-analyzer/src/main/java/org/apache/tez/analyzer/plugins/TezAnalyzerBase.java b/tez-tools/analyzers/job-analyzer/src/main/java/org/apache/tez/analyzer/plugins/TezAnalyzerBase.java index 1549de9f84..705c6e9cfb 100644 --- a/tez-tools/analyzers/job-analyzer/src/main/java/org/apache/tez/analyzer/plugins/TezAnalyzerBase.java +++ b/tez-tools/analyzers/job-analyzer/src/main/java/org/apache/tez/analyzer/plugins/TezAnalyzerBase.java @@ -19,7 +19,12 @@ package org.apache.tez.analyzer.plugins; import java.io.File; +import java.io.FilenameFilter; +import java.util.ArrayList; +import java.util.Arrays; import java.util.Iterator; +import java.util.List; +import java.util.stream.Collectors; import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.GnuParser; @@ -27,6 +32,8 @@ import org.apache.commons.cli.OptionBuilder; import org.apache.commons.cli.Options; import org.apache.commons.cli.ParseException; +import org.apache.commons.io.FileUtils; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configured; import org.apache.hadoop.fs.Path; import org.apache.hadoop.util.Tool; @@ -36,10 +43,11 @@ import org.apache.tez.dag.api.TezException; import org.apache.tez.history.ATSImportTool; import org.apache.tez.history.parser.ATSFileParser; +import org.apache.tez.history.parser.ProtoHistoryParser; import org.apache.tez.history.parser.SimpleHistoryParser; import org.apache.tez.history.parser.datamodel.DagInfo; -import com.google.common.base.Preconditions; +import org.apache.tez.common.Preconditions; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -52,6 +60,7 @@ public abstract class TezAnalyzerBase extends Configured implements Tool, Analyz private static final String SAVE_RESULTS = "saveResults"; private static final String DAG_ID = "dagId"; private static final String FROM_SIMPLE_HISTORY = "fromSimpleHistory"; + private static final String FROM_PROTO_HISTORY = "fromProtoHistory"; private static final String HELP = "help"; private static final int SEPARATOR_WIDTH = 80; @@ -59,7 +68,11 @@ public abstract class TezAnalyzerBase extends Configured implements Tool, Analyz private String outputDir; private boolean saveResults = false; - + + public TezAnalyzerBase(Configuration config) { + setConf(config); + } + @SuppressWarnings("static-access") private static Options buildOptions() { Option dagIdOption = OptionBuilder.withArgName(DAG_ID).withLongOpt(DAG_ID) @@ -81,7 +94,12 @@ private static Options buildOptions() { (FROM_SIMPLE_HISTORY) .withDescription("Event data from Simple History logging. Must also specify event file") .isRequired(false).create(); - + + Option fromProtoHistoryOption = + OptionBuilder.withArgName(FROM_PROTO_HISTORY).withLongOpt(FROM_PROTO_HISTORY) + .withDescription("Event data from Proto History logging. Must also specify event file") + .isRequired(false).create(); + Option help = OptionBuilder.withArgName(HELP).withLongOpt (HELP) .withDescription("print help") @@ -93,6 +111,7 @@ private static Options buildOptions() { opts.addOption(saveResults); opts.addOption(eventFileNameOption); opts.addOption(fromSimpleHistoryOption); + opts.addOption(fromProtoHistoryOption); opts.addOption(help); return opts; } @@ -133,21 +152,36 @@ public int run(String[] args) throws Exception { outputDir = System.getProperty("user.dir"); } - File file = null; + String dagId = cmdLine.getOptionValue(DAG_ID); + + List files = new ArrayList(); if (cmdLine.hasOption(EVENT_FILE_NAME)) { - file = new File(cmdLine.getOptionValue(EVENT_FILE_NAME)); + for (String file : cmdLine.getOptionValue(EVENT_FILE_NAME).split(",")) { + File fileOrDir = new File(file); + if (fileOrDir.exists()) { + if (fileOrDir.isFile()) { + files.add(fileOrDir); + } else { + files.addAll(collectFilesForDagId(fileOrDir, dagId)); + } + } + } } - - String dagId = cmdLine.getOptionValue(DAG_ID); - + DagInfo dagInfo = null; - if (file == null) { + if (files.isEmpty()) { if (cmdLine.hasOption(FROM_SIMPLE_HISTORY)) { System.err.println("Event file name must be specified when using simple history"); printUsage(); return -2; } + if (cmdLine.hasOption(FROM_PROTO_HISTORY)) { + System.err.println("Proto file name must be specified when using proto history"); + printUsage(); + return -2; + } + // using ATS - try to download directly String[] importArgs = { "--dagId=" + dagId, "--downloadDir=" + outputDir }; @@ -159,30 +193,60 @@ public int run(String[] args) throws Exception { //Parse ATS data and verify results //Parse downloaded contents - file = new File(outputDir - + Path.SEPARATOR + dagId + ".zip"); + files.add(new File(outputDir + + Path.SEPARATOR + dagId + ".zip")); } - Preconditions.checkState(file != null); - if (!cmdLine.hasOption(FROM_SIMPLE_HISTORY)) { - ATSFileParser parser = new ATSFileParser(file); + Preconditions.checkState(!files.isEmpty()); + if (cmdLine.hasOption(FROM_SIMPLE_HISTORY)) { + SimpleHistoryParser parser = new SimpleHistoryParser(files); + dagInfo = parser.getDAGData(dagId); + } else if (cmdLine.hasOption(FROM_PROTO_HISTORY)) { + ProtoHistoryParser parser = new ProtoHistoryParser(files); dagInfo = parser.getDAGData(dagId); } else { - SimpleHistoryParser parser = new SimpleHistoryParser(file); + ATSFileParser parser = new ATSFileParser(files); dagInfo = parser.getDAGData(dagId); } Preconditions.checkState(dagInfo.getDagId().equals(dagId)); analyze(dagInfo); Result result = getResult(); - if (saveResults && (result instanceof CSVResult)) { - String fileName = outputDir + File.separator - + this.getClass().getName() + "_" + dagInfo.getDagId() + ".csv"; - ((CSVResult) result).dumpToFile(fileName); - LOG.info("Saved results in " + fileName); + + if (saveResults) { + String dagInfoFileName = outputDir + File.separator + this.getClass().getName() + "_" + + dagInfo.getDagId() + ".dag"; + FileUtils.writeStringToFile(new File(dagInfoFileName), dagInfo.toExtendedString()); + LOG.info("Saved dag info in " + dagInfoFileName); + + if (result instanceof CSVResult) { + String fileName = outputDir + File.separator + this.getClass().getName() + "_" + + dagInfo.getDagId() + ".csv"; + ((CSVResult) result).dumpToFile(fileName); + LOG.info("Saved results in " + fileName); + } } + return 0; } + private List collectFilesForDagId(File parentDir, String dagId) { + File[] arrFiles = parentDir.listFiles(new FilenameFilter() { + @Override + public boolean accept(File dir, String name) { + return name.contains(dagId); + } + }); + if (arrFiles == null || arrFiles.length == 0) { + throw new RuntimeException( + String.format("cannot find relevant files for dag: '%s' in dir: %s", dagId, parentDir)); + } + + List files = Arrays.asList(arrFiles); + LOG.info("collected files for dag: \n" + + files.stream().map(f -> "\n" + f.getAbsolutePath()).collect(Collectors.toList())); + return files; + } + public void printResults() throws TezException { Result result = getResult(); if (result instanceof CSVResult) { diff --git a/tez-tools/analyzers/job-analyzer/src/main/java/org/apache/tez/analyzer/plugins/VertexLevelCriticalPathAnalyzer.java b/tez-tools/analyzers/job-analyzer/src/main/java/org/apache/tez/analyzer/plugins/VertexLevelCriticalPathAnalyzer.java index 06b8983e9d..78a4d41f38 100644 --- a/tez-tools/analyzers/job-analyzer/src/main/java/org/apache/tez/analyzer/plugins/VertexLevelCriticalPathAnalyzer.java +++ b/tez-tools/analyzers/job-analyzer/src/main/java/org/apache/tez/analyzer/plugins/VertexLevelCriticalPathAnalyzer.java @@ -44,8 +44,6 @@ * Identify a set of vertices which fall in the critical path in a DAG. */ public class VertexLevelCriticalPathAnalyzer extends TezAnalyzerBase implements Analyzer { - private final Configuration config; - private static final String[] headers = { "CriticalPath", "Score" }; private final CSVResult csvResult; @@ -58,7 +56,7 @@ public class VertexLevelCriticalPathAnalyzer extends TezAnalyzerBase implements private static final String CONNECTOR = "-->"; public VertexLevelCriticalPathAnalyzer(Configuration config) { - this.config = config; + super(config); this.csvResult = new CSVResult(headers); this.dotFileLocation = config.get(DOT_FILE_DIR, DOT_FILE_DIR_DEFAULT); } @@ -105,11 +103,6 @@ public String getDescription() { return "Analyze vertex level critical path of the DAG"; } - @Override - public Configuration getConfiguration() { - return config; - } - private static Map sortByValues(Map result) { //Sort result by time in reverse order final Ordering reversValueOrdering = diff --git a/tez-tools/analyzers/job-analyzer/src/main/java/org/apache/tez/analyzer/utils/SVGUtils.java b/tez-tools/analyzers/job-analyzer/src/main/java/org/apache/tez/analyzer/utils/SVGUtils.java index 90acf3b0a0..b1689ff829 100644 --- a/tez-tools/analyzers/job-analyzer/src/main/java/org/apache/tez/analyzer/utils/SVGUtils.java +++ b/tez-tools/analyzers/job-analyzer/src/main/java/org/apache/tez/analyzer/utils/SVGUtils.java @@ -41,7 +41,7 @@ public class SVGUtils { private static int MAX_DAG_RUNTIME = 0; private static final int SCREEN_WIDTH = 1800; - public SVGUtils() { + public SVGUtils() { } private int Y_MAX; @@ -71,29 +71,29 @@ public static String getTimeStr(final long millis) { long seconds = millis - TimeUnit.MINUTES.toMillis( TimeUnit.MILLISECONDS.toMinutes(millis)); b.append(secondFormat.format(seconds/1000.0) + "s"); - - return b.toString(); + + return b.toString(); } - + List svgLines = new LinkedList<>(); - + private final int addOffsetX(int x) { int xOff = x + X_BASE; X_MAX = Math.max(X_MAX, xOff); return xOff; } - + private final int addOffsetY(int y) { int yOff = y + Y_BASE; Y_MAX = Math.max(Y_MAX, yOff); return yOff; } - + private int scaleDown(int len) { return Math.round((len * 1.0f / MAX_DAG_RUNTIME) * SCREEN_WIDTH); } - - private void addRectStr(int x, int width, int y, int height, + + private void addRectStr(int x, int width, int y, int height, String fillColor, String borderColor, float opacity, String title) { String rectStyle = "stroke: " + borderColor + "; fill: " + fillColor + "; opacity: " + opacity; String rectStr = "" + " " + title +"" + " "; - svgLines.add(rectStr); + svgLines.add(rectStr); } - + private void addTextStr(int x, int y, String text, String anchor, int size, String title, boolean italic) { String textStyle = "text-anchor: " + anchor + "; font-style: " + (italic?"italic":"normal") + "; font-size: " + size + "px;"; @@ -118,7 +118,7 @@ private void addTextStr(int x, int y, String text, String anchor, int size, Stri + ""; svgLines.add(textStr); } - + private void addLineStr(int x1, int y1, int x2, int y2, String color, String title, int width) { String style = "stroke: " + color + "; stroke-width:" + width; String str = ""; svgLines.add(str); } - + public void drawStep(CriticalPathStep step, long dagStartTime, int yOffset) { if (step.getType() != EntityType.ATTEMPT) { // draw initial vertex or final commit overhead StringBuilder title = new StringBuilder(); String text = null; if (step.getType() == EntityType.VERTEX_INIT) { - String vertex = step.getAttempt().getTaskInfo().getVertexInfo().getVertexName(); + String vertex = step.getAttempt().getTaskInfo().getVertexInfo().getVertexName(); text = vertex + " : Init"; title.append(text).append(TITLE_BR); } else { @@ -165,9 +165,9 @@ public void drawStep(CriticalPathStep step, long dagStartTime, int yOffset) { int startCriticalTimeInterval = (int) (step.getStartCriticalTime() - dagStartTime); int stopCriticalTimeInterval = (int) (step.getStopCriticalTime() - dagStartTime); int creationTimeInterval = (int) (attempt.getCreationTime() - dagStartTime); - int allocationTimeInterval = attempt.getAllocationTime() > 0 ? + int allocationTimeInterval = attempt.getAllocationTime() > 0 ? (int) (attempt.getAllocationTime() - dagStartTime) : 0; - int launchTimeInterval = attempt.getStartTime() > 0 ? + int launchTimeInterval = attempt.getStartTime() > 0 ? (int) (attempt.getStartTime() - dagStartTime) : 0; int finishTimeInterval = (int) (attempt.getFinishTime() - dagStartTime); LOG.debug(attempt.getTaskAttemptId() + " " + creationTimeInterval + " " @@ -178,7 +178,7 @@ public void drawStep(CriticalPathStep step, long dagStartTime, int yOffset) { title.append("Critical Path Dependency: " + step.getReason()).append(TITLE_BR); title.append("Completion Status: " + attempt.getDetailedStatus()).append(TITLE_BR); title.append( - "Critical Time Contribution: " + + "Critical Time Contribution: " + getTimeStr(step.getStopCriticalTime() - step.getStartCriticalTime())).append(TITLE_BR); title.append("Critical start at: " + getTimeStr(startCriticalTimeInterval)).append(TITLE_BR); title.append("Critical stop at: " + getTimeStr(stopCriticalTimeInterval)).append(TITLE_BR); @@ -201,29 +201,29 @@ public void drawStep(CriticalPathStep step, long dagStartTime, int yOffset) { if (launchTimeInterval > 0) { addRectStr(allocationTimeInterval, launchTimeInterval - allocationTimeInterval, yOffset * STEP_GAP, STEP_GAP, LAUNCH_OVERHEAD_COLOR, BORDER_COLOR, RECT_OPACITY, - titleStr); + titleStr); addRectStr(launchTimeInterval, finishTimeInterval - launchTimeInterval, yOffset * STEP_GAP, STEP_GAP, RUNTIME_COLOR, BORDER_COLOR, RECT_OPACITY, titleStr); } else { // no launch - so allocate to finish drawn - ended while launching addRectStr(allocationTimeInterval, finishTimeInterval - allocationTimeInterval, yOffset * STEP_GAP, - STEP_GAP, LAUNCH_OVERHEAD_COLOR, BORDER_COLOR, RECT_OPACITY, titleStr); + STEP_GAP, LAUNCH_OVERHEAD_COLOR, BORDER_COLOR, RECT_OPACITY, titleStr); } } else { // no allocation - so create to finish drawn - ended while allocating addRectStr(creationTimeInterval, finishTimeInterval - creationTimeInterval, yOffset * STEP_GAP, - STEP_GAP, ALLOCATION_OVERHEAD_COLOR, BORDER_COLOR, RECT_OPACITY, titleStr); + STEP_GAP, ALLOCATION_OVERHEAD_COLOR, BORDER_COLOR, RECT_OPACITY, titleStr); } addTextStr((finishTimeInterval + creationTimeInterval) / 2, - (yOffset * STEP_GAP + STEP_GAP / 2), attempt.getShortName(), "middle", TEXT_SIZE, + (yOffset * STEP_GAP + STEP_GAP / 2), attempt.getShortName(), "middle", TEXT_SIZE, titleStr, !attempt.isSucceeded()); } } private void drawCritical(DagInfo dagInfo, List criticalPath) { long dagStartTime = dagInfo.getStartTime(); - int dagStartTimeInterval = 0; // this is 0 since we are offseting from the dag start time + int dagStartTimeInterval = 0; // this is 0 since we are offsetting from the dag start time int dagFinishTimeInterval = (int) (dagInfo.getFinishTime() - dagStartTime); if (dagInfo.getFinishTime() <= 0) { // AM crashed. no dag finish time written @@ -231,13 +231,13 @@ private void drawCritical(DagInfo dagInfo, List criticalPath) - dagStartTime); } MAX_DAG_RUNTIME = dagFinishTimeInterval; - + // draw grid addLineStr(dagStartTimeInterval, 0, dagFinishTimeInterval, 0, BORDER_COLOR, "", TICK); int yGrid = (criticalPath.size() + 2)*STEP_GAP; for (int i=0; i<11; ++i) { int x = Math.round(((dagFinishTimeInterval - dagStartTimeInterval)/10.0f)*i); - addLineStr(x, 0, x, yGrid, BORDER_COLOR, "", TICK); + addLineStr(x, 0, x, yGrid, BORDER_COLOR, "", TICK); addTextStr(x, 0, getTimeStr(x), "left", TEXT_SIZE, "", false); } addLineStr(dagStartTimeInterval, yGrid, dagFinishTimeInterval, yGrid, BORDER_COLOR, "", TICK); @@ -247,21 +247,21 @@ private void drawCritical(DagInfo dagInfo, List criticalPath) // draw steps for (int i=1; i<=criticalPath.size(); ++i) { - CriticalPathStep step = criticalPath.get(i-1); - drawStep(step, dagStartTime, i); + CriticalPathStep step = criticalPath.get(i-1); + drawStep(step, dagStartTime, i); } - + // draw critical path on top for (int i=1; i<=criticalPath.size(); ++i) { - CriticalPathStep step = criticalPath.get(i-1); - boolean isLast = i == criticalPath.size(); - + CriticalPathStep step = criticalPath.get(i-1); + boolean isLast = i == criticalPath.size(); + // draw critical path for step int startCriticalTimeInterval = (int) (step.getStartCriticalTime() - dagStartTime); int stopCriticalTimeInterval = (int) (step.getStopCriticalTime() - dagStartTime); addLineStr(startCriticalTimeInterval, (i + 1) * STEP_GAP, stopCriticalTimeInterval, (i + 1) * STEP_GAP, CRITICAL_COLOR, "Critical Time " + step.getAttempt().getShortName(), TICK*5); - + if (isLast) { // last step. add commit overhead int stepStopCriticalTimeInterval = (int) (step.getStopCriticalTime() - dagStartTime); @@ -274,12 +274,12 @@ private void drawCritical(DagInfo dagInfo, List criticalPath) (i + 2) * STEP_GAP, CRITICAL_COLOR, "Critical Time " + step.getAttempt().getShortName(), TICK*5); } } - + // draw legend int legendX = 0; int legendY = (criticalPath.size() + 2) * STEP_GAP; int legendWidth = dagFinishTimeInterval/5; - + addRectStr(legendX, legendWidth, legendY, STEP_GAP/2, VERTEX_INIT_COMMIT_COLOR, BORDER_COLOR, RECT_OPACITY, ""); addTextStr(legendX, legendY + STEP_GAP/3, "Vertex Init/Commit Overhead", "left", TEXT_SIZE, "", false); legendY += STEP_GAP/2; @@ -291,17 +291,17 @@ private void drawCritical(DagInfo dagInfo, List criticalPath) legendY += STEP_GAP/2; addRectStr(legendX, legendWidth, legendY, STEP_GAP/2, RUNTIME_COLOR, BORDER_COLOR, RECT_OPACITY, ""); addTextStr(legendX, legendY + STEP_GAP/3, "Task Execution Time", "left", TEXT_SIZE, "", false); - + Y_MAX += Y_BASE*2; X_MAX += X_BASE*2; } - - public void saveCriticalPathAsSVG(DagInfo dagInfo, + + public void saveCriticalPathAsSVG(DagInfo dagInfo, String fileName, List criticalPath) { drawCritical(dagInfo, criticalPath); saveFileStr(fileName); } - + private void saveFileStr(String fileName) { String header = " " + " org.apache.tez tez-tools - 0.9.1-SNAPSHOT + 0.10.5-SNAPSHOT tez-perf-analyzer pom @@ -29,9 +29,7 @@ hadoop27 - - !skipATS - + false job-analyzer @@ -40,7 +38,7 @@ hadoop28 - false + true job-analyzer diff --git a/tez-tools/pom.xml b/tez-tools/pom.xml index d23606f5aa..8811d80687 100644 --- a/tez-tools/pom.xml +++ b/tez-tools/pom.xml @@ -21,7 +21,7 @@ org.apache.tez tez - 0.9.1-SNAPSHOT + 0.10.5-SNAPSHOT tez-tools pom diff --git a/tez-tools/swimlanes/swimlane.py b/tez-tools/swimlanes/swimlane.py index bbd54df46d..11976daab9 100644 --- a/tez-tools/swimlanes/swimlane.py +++ b/tez-tools/swimlanes/swimlane.py @@ -195,6 +195,7 @@ def main(argv): svg.text(marginRight+xdomain(percentX), y+marginTop+12, "%d%% (%0.1fs)" % (int(fraction*100), (percentX - dag.start)/1000.0), style="font-size:12px; text-anchor: middle") out.write(svg.flush()) out.close() + print("Output svg is written into: " + str(out)) if __name__ == "__main__": sys.exit(main(sys.argv[1:])) diff --git a/tez-tools/swimlanes/yarn-swimlanes.sh b/tez-tools/swimlanes/yarn-swimlanes.sh old mode 100644 new mode 100755 index df4d071a7c..02465b0129 --- a/tez-tools/swimlanes/yarn-swimlanes.sh +++ b/tez-tools/swimlanes/yarn-swimlanes.sh @@ -19,10 +19,17 @@ set -e APPID=$1 - -YARN=$(which yarn); TMP=$(mktemp) +DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" + +if [[ -f $APPID ]]; then + echo "Reading yarn logs from local file: $APPID" + cat "$APPID" | grep HISTORY > "$TMP" +else + YARN=$(which yarn); + echo "Fetching yarn logs for $APPID" + $YARN logs -applicationId "$APPID" | grep HISTORY > "$TMP" +fi +echo "History was written into $TMP" -echo "Fetching yarn logs for $APPID" -$YARN logs -applicationId $APPID | grep HISTORY > $TMP -python swimlane.py -o $APPID.svg $TMP +python "$DIR/swimlane.py" -o "$APPID.svg" "$TMP" \ No newline at end of file diff --git a/tez-tools/tez-javadoc-tools/findbugs-exclude.xml b/tez-tools/tez-javadoc-tools/findbugs-exclude.xml index ad8f6e4ce4..d21aa36315 100644 --- a/tez-tools/tez-javadoc-tools/findbugs-exclude.xml +++ b/tez-tools/tez-javadoc-tools/findbugs-exclude.xml @@ -13,6 +13,18 @@ --> + + + + + + + + + + + + diff --git a/tez-tools/tez-javadoc-tools/pom.xml b/tez-tools/tez-javadoc-tools/pom.xml index 4ef229e0b1..4ef89b6918 100644 --- a/tez-tools/tez-javadoc-tools/pom.xml +++ b/tez-tools/tez-javadoc-tools/pom.xml @@ -20,7 +20,7 @@ org.apache.tez tez-tools - 0.9.1-SNAPSHOT + 0.10.5-SNAPSHOT tez-javadoc-tools @@ -78,6 +78,62 @@ + + + java8-16 + + [1.8,16] + + + + + org.codehaus.mojo + build-helper-maven-plugin + + + generate-sources + + add-source + + + + src/main/java-8-16 + + + + + + + + + + + java17 + + [17,) + + + + + org.codehaus.mojo + build-helper-maven-plugin + + + generate-sources + + add-source + + + + src/main/java-17 + + + + + + + + @@ -94,6 +150,12 @@ true + + maven-javadoc-plugin + + org.apache.tez.tools.javadoc.doclet + + diff --git a/tez-tools/tez-javadoc-tools/src/main/java-17/org/apache/tez/tools/javadoc/doclet/ConfigStandardDoclet.java b/tez-tools/tez-javadoc-tools/src/main/java-17/org/apache/tez/tools/javadoc/doclet/ConfigStandardDoclet.java new file mode 100644 index 0000000000..bd321475c4 --- /dev/null +++ b/tez-tools/tez-javadoc-tools/src/main/java-17/org/apache/tez/tools/javadoc/doclet/ConfigStandardDoclet.java @@ -0,0 +1,291 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tez.tools.javadoc.doclet; + +import com.sun.source.util.DocTrees; +import jdk.javadoc.doclet.Doclet; +import jdk.javadoc.doclet.DocletEnvironment; +import jdk.javadoc.doclet.Reporter; +import org.apache.hadoop.classification.InterfaceAudience.Private; +import org.apache.hadoop.classification.InterfaceStability.Evolving; +import org.apache.hadoop.classification.InterfaceStability.Unstable; +import org.apache.tez.common.annotation.ConfigurationClass; +import org.apache.tez.common.annotation.ConfigurationProperty; +import org.apache.tez.tools.javadoc.model.Config; +import org.apache.tez.tools.javadoc.model.ConfigProperty; +import org.apache.tez.tools.javadoc.util.HtmlWriter; +import org.apache.tez.tools.javadoc.util.XmlWriter; + +import javax.lang.model.SourceVersion; +import javax.lang.model.element.AnnotationMirror; +import javax.lang.model.element.AnnotationValue; +import javax.lang.model.element.Element; +import javax.lang.model.element.ElementKind; +import javax.lang.model.element.ExecutableElement; +import javax.lang.model.element.Modifier; +import javax.lang.model.element.TypeElement; +import javax.lang.model.element.VariableElement; +import javax.tools.Diagnostic; +import java.io.IOException; +import java.util.Arrays; +import java.util.HashSet; +import java.util.List; +import java.util.Locale; +import java.util.Map; +import java.util.Set; + +public class ConfigStandardDoclet implements Doclet { + + private static boolean debugMode = false; + + private Reporter reporter; + + private static String stripQuotes(String s) { + if (s.charAt(0) == '"' && s.charAt(s.length() - 1) == '"') { + return s.substring(1, s.length() - 1); + } + return s; + } + + @Override + public void init(Locale locale, Reporter reporter) { + this.reporter = reporter; + } + + @Override + public String getName() { + return "Tez"; + } + + @Override + public SourceVersion getSupportedSourceVersion() { + return SourceVersion.RELEASE_9; + } + + private void logMessage(String message) { + if (!debugMode) { + return; + } + reporter.print(Diagnostic.Kind.NOTE, message); + } + + @Override + public boolean run(DocletEnvironment docEnv) { + logMessage("Running doclet " + ConfigStandardDoclet.class.getSimpleName()); + DocTrees docTrees = docEnv.getDocTrees(); + for (Element element : docEnv.getIncludedElements()) { + if (element.getKind().equals(ElementKind.CLASS) && element instanceof TypeElement) { + processDoc(docTrees, (TypeElement) element); + } + } + + return true; + } + + private void processDoc(DocTrees docTrees, TypeElement doc) { + logMessage("Parsing : " + doc); + if (!doc.getKind().equals(ElementKind.CLASS)) { + logMessage("Ignoring non-class: " + doc); + return; + } + + List annotations = doc.getAnnotationMirrors(); + boolean isConfigClass = false; + String templateName = null; + for (AnnotationMirror annotation : annotations) { + logMessage("Checking annotation: " + annotation.getAnnotationType()); + if (annotation.getAnnotationType().asElement().toString().equals( + ConfigurationClass.class.getName())) { + isConfigClass = true; + Map elementValues = annotation.getElementValues(); + for (Map.Entry element : elementValues.entrySet()) { + if (element.getKey().getSimpleName().toString().equals("templateFileName")) { + templateName = stripQuotes(element.getValue().getValue().toString()); + } + } + break; + } + } + + if (!isConfigClass) { + logMessage("Ignoring non-config class: " + doc); + return; + } + + logMessage("Processing config class: " + doc); + Config config = new Config(doc.getSimpleName().toString(), templateName); + Map configProperties = config.getConfigProperties(); + + processElements(docTrees, doc, configProperties); + + HtmlWriter writer = new HtmlWriter(); + try { + writer.write(config); + } catch (IOException e) { + throw new RuntimeException(e); + } + + XmlWriter xmlWriter = new XmlWriter(); + try { + xmlWriter.write(config); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + private void processElements(DocTrees docTrees, TypeElement doc, Map configProperties) { + List elements = doc.getEnclosedElements(); + for (Element f : elements) { + if (!(f instanceof VariableElement)) { + continue; + } + + if (!f.getKind().equals(ElementKind.FIELD)) { + continue; + } + + VariableElement field = (VariableElement) f; + + if (field.getModifiers().contains(Modifier.PRIVATE)) { + logMessage("Skipping private field: " + field); + continue; + } + if (field.getModifiers().contains(Modifier.STATIC)) { + logMessage("Skipping non-static field: " + field); + continue; + } + + String fieldName = field.getSimpleName().toString(); + if (fieldName.endsWith("_PREFIX")) { + logMessage("Skipping non-config prefix constant field: " + field); + continue; + } + if (fieldName.equals("TEZ_SITE_XML")) { + logMessage("Skipping constant field: " + field); + continue; + } + + if (fieldName.endsWith("_DEFAULT")) { + + String name = fieldName.substring(0, fieldName.lastIndexOf("_DEFAULT")); + if (!configProperties.containsKey(name)) { + configProperties.put(name, new ConfigProperty()); + } + ConfigProperty configProperty = configProperties.get(name); + if (field.getConstantValue() == null) { + String val = field.getConstantValue().toString(); + logMessage("Got null constant value" + + ", name=" + name + + ", field=" + fieldName + + ", val=" + val); + configProperty.setDefaultValue(val); + } else { + configProperty.setDefaultValue(field.getConstantValue().toString()); + } + configProperty.setInferredType(field.getSimpleName().toString()); + + if (name.equals("TEZ_AM_STAGING_DIR") && configProperty.getDefaultValue() != null) { + String defaultValue = configProperty.getDefaultValue(); + defaultValue = defaultValue.replace(System.getProperty("user.name"), "${user.name}"); + configProperty.setDefaultValue(defaultValue); + } + + continue; + } + + if (!configProperties.containsKey(fieldName)) { + configProperties.put(fieldName, new ConfigProperty()); + } + ConfigProperty configProperty = configProperties.get(fieldName); + configProperty.setPropertyName(field.getConstantValue().toString()); + + List annotationDescs = field.getAnnotationMirrors(); + + for (AnnotationMirror annotationDesc : annotationDescs) { + String elementFqName = annotationDesc.getAnnotationType().asElement().toString(); + if (elementFqName.equals(Private.class.getCanonicalName())) { + configProperty.setPrivate(true); + } + if (elementFqName.equals(Unstable.class.getCanonicalName())) { + configProperty.setUnstable(true); + } + if (elementFqName.equals(Evolving.class.getCanonicalName())) { + configProperty.setEvolving(true); + } + if (elementFqName.equals(ConfigurationProperty.class.getCanonicalName())) { + configProperty.setValidConfigProp(true); + + for (Map.Entry element + : annotationDesc.getElementValues().entrySet()) { + if (element.getKey().getSimpleName().toString().equals("type")) { + configProperty.setType(stripQuotes(element.getValue().getValue().toString())); + } else { + logMessage("Unhandled annotation property: " + element.getKey().getSimpleName()); + } + } + } + } + configProperty.setDescription(docTrees.getDocCommentTree(field).getFullBody().toString()); + } + } + + @Override + public Set getSupportedOptions() { + Option[] options = { + new Option() { + private final List someOption = Arrays.asList( + "-debug", + "--debug" + ); + + @Override + public int getArgumentCount() { + return 0; + } + + @Override + public String getDescription() { + return "Debug mode"; + } + + @Override + public Option.Kind getKind() { + return Kind.STANDARD; + } + + @Override + public List getNames() { + return someOption; + } + + @Override + public String getParameters() { + return ""; + } + + @Override + public boolean process(String opt, List arguments) { + debugMode = true; + return true; + } + } + }; + return new HashSet<>(Arrays.asList(options)); + } +} diff --git a/tez-tools/tez-javadoc-tools/src/main/java-17/org/apache/tez/tools/javadoc/doclet/package-info.java b/tez-tools/tez-javadoc-tools/src/main/java-17/org/apache/tez/tools/javadoc/doclet/package-info.java new file mode 100644 index 0000000000..190ae4622e --- /dev/null +++ b/tez-tools/tez-javadoc-tools/src/main/java-17/org/apache/tez/tools/javadoc/doclet/package-info.java @@ -0,0 +1,22 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +@Private +package org.apache.tez.tools.javadoc.doclet; + +import org.apache.hadoop.classification.InterfaceAudience.Private; \ No newline at end of file diff --git a/tez-tools/tez-javadoc-tools/src/main/java/org/apache/tez/tools/javadoc/model/ConfigProperty.java b/tez-tools/tez-javadoc-tools/src/main/java-17/org/apache/tez/tools/javadoc/model/Config.java similarity index 53% rename from tez-tools/tez-javadoc-tools/src/main/java/org/apache/tez/tools/javadoc/model/ConfigProperty.java rename to tez-tools/tez-javadoc-tools/src/main/java-17/org/apache/tez/tools/javadoc/model/Config.java index 89490c4a51..d5121aaad7 100644 --- a/tez-tools/tez-javadoc-tools/src/main/java/org/apache/tez/tools/javadoc/model/ConfigProperty.java +++ b/tez-tools/tez-javadoc-tools/src/main/java-17/org/apache/tez/tools/javadoc/model/Config.java @@ -18,30 +18,35 @@ package org.apache.tez.tools.javadoc.model; -public class ConfigProperty { - - public String propertyName; - public String defaultValue; - public String description; - public String type = "string"; - public boolean isPrivate = false; - public boolean isUnstable = false; - public boolean isEvolving = false; - public boolean isValidConfigProp = false; - public String[] validValues; - public String inferredType; - - @Override - public String toString() { - return "name=" + propertyName - + ", defaultValue=" + defaultValue - + ", description=" + description - + ", type=" + type - + ", inferredType=" + inferredType - + ", private=" + isPrivate - + ", validValues=" + (validValues == null ? "null" : validValues) - + ", isConfigProp=" + isValidConfigProp; +import java.util.Map; +import java.util.TreeMap; + +public class Config { + + private final String templateName; + private final String configName; + private Map configProperties; + + public Config(String configName, String templateName) { + this.configName = configName; + this.templateName = templateName; + this.setConfigProperties(new TreeMap()); } -} + public String getTemplateName() { + return templateName; + } + public String getConfigName() { + return configName; + } + + public Map getConfigProperties() { + return configProperties; + } + + public void setConfigProperties( + Map configProperties) { + this.configProperties = configProperties; + } +} diff --git a/tez-tools/tez-javadoc-tools/src/main/java-17/org/apache/tez/tools/javadoc/model/ConfigProperty.java b/tez-tools/tez-javadoc-tools/src/main/java-17/org/apache/tez/tools/javadoc/model/ConfigProperty.java new file mode 100644 index 0000000000..27208de55b --- /dev/null +++ b/tez-tools/tez-javadoc-tools/src/main/java-17/org/apache/tez/tools/javadoc/model/ConfigProperty.java @@ -0,0 +1,115 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tez.tools.javadoc.model; + +public class ConfigProperty { + + private String propertyName; + private String defaultValue; + private String description; + private String type = "string"; + private boolean isPrivate = false; + private boolean isUnstable = false; + private boolean isEvolving = false; + private boolean isValidConfigProp = false; + private String inferredType; + + public String getPropertyName() { + return propertyName; + } + + public void setPropertyName(String propertyName) { + this.propertyName = propertyName; + } + + public String getDefaultValue() { + return defaultValue; + } + + public void setDefaultValue(String defaultValue) { + this.defaultValue = defaultValue; + } + + public String getDescription() { + return description; + } + + public void setDescription(String description) { + this.description = description; + } + + public String getType() { + return type; + } + + public void setType(String type) { + this.type = type; + } + + public boolean isPrivate() { + return isPrivate; + } + + public void setPrivate(boolean aPrivate) { + isPrivate = aPrivate; + } + + public boolean isUnstable() { + return isUnstable; + } + + public void setUnstable(boolean unstable) { + isUnstable = unstable; + } + + public boolean isEvolving() { + return isEvolving; + } + + public void setEvolving(boolean evolving) { + isEvolving = evolving; + } + + public boolean isValidConfigProp() { + return isValidConfigProp; + } + + public void setValidConfigProp(boolean validConfigProp) { + isValidConfigProp = validConfigProp; + } + + public String getInferredType() { + return inferredType; + } + + public void setInferredType(String inferredType) { + this.inferredType = inferredType; + } + + @Override + public String toString() { + return "name=" + getPropertyName() + + ", defaultValue=" + getDefaultValue() + + ", description=" + getDescription() + + ", type=" + getType() + + ", inferredType=" + getInferredType() + + ", private=" + isPrivate() + + ", isConfigProp=" + isValidConfigProp(); + } +} diff --git a/tez-tools/tez-javadoc-tools/src/main/java-17/org/apache/tez/tools/javadoc/model/package-info.java b/tez-tools/tez-javadoc-tools/src/main/java-17/org/apache/tez/tools/javadoc/model/package-info.java new file mode 100644 index 0000000000..76acd05258 --- /dev/null +++ b/tez-tools/tez-javadoc-tools/src/main/java-17/org/apache/tez/tools/javadoc/model/package-info.java @@ -0,0 +1,22 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +@Private +package org.apache.tez.tools.javadoc.model; + +import org.apache.hadoop.classification.InterfaceAudience.Private; \ No newline at end of file diff --git a/tez-tools/tez-javadoc-tools/src/main/java-17/org/apache/tez/tools/javadoc/util/HtmlWriter.java b/tez-tools/tez-javadoc-tools/src/main/java-17/org/apache/tez/tools/javadoc/util/HtmlWriter.java new file mode 100644 index 0000000000..9c512ee7f7 --- /dev/null +++ b/tez-tools/tez-javadoc-tools/src/main/java-17/org/apache/tez/tools/javadoc/util/HtmlWriter.java @@ -0,0 +1,157 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tez.tools.javadoc.util; + +import org.apache.tez.tools.javadoc.model.Config; +import org.apache.tez.tools.javadoc.model.ConfigProperty; + +import java.io.*; +import java.nio.charset.StandardCharsets; + +public class HtmlWriter extends Writer { + + private static final String DEFAULT_STYLESHEET = "default-stylesheet.css"; + + public void write(Config config) throws IOException { + PrintWriter out = null; + + if (config.getConfigName() == null || config.getConfigName().isEmpty()) { + throw new RuntimeException("Config Name is null or empty"); + } + + try { + File file = new File(config.getConfigName() + ".html"); + out = new PrintWriter(new OutputStreamWriter(new FileOutputStream(file), StandardCharsets.UTF_8)); + + out.println(""); + out.println(""); + + out.println(""); + + out.println(""); + out.println(""); + out.println("" + config.getConfigName() + ""); + out.println(""); + + out.println(""); + + out.println(""); + + out.println("

    "); + out.println("
    "); + + out.println("

    " + config.getConfigName() + "

    "); + out.println("
    "); + + out.println(""); + out.println(""); + out.println(""); + out.println(""); + out.println(""); + out.println(""); + // out.println(""); + out.println(""); + out.println(""); + out.println(""); + out.println(""); + + for (ConfigProperty configProperty : config.getConfigProperties().values()) { + if (!isValidConfigProperty(configProperty)) { + continue; + } + + String altClass = ""; + if (configProperty.isPrivate()) { + altClass = "class=\"tr_private\""; + } else if (configProperty.isEvolving() || configProperty.isUnstable()) { + altClass = "class=\"tr_evolve_unstable\""; + } + + out.println(""); + out.println(""); + out.println(""); + out.println(""); + out.println(""); + + out.println( + ""); + out.println( + ""); + out.println( + ""); + out.println(""); + } + + out.println("
    " + "Property Name" + "" + "Default Value" + "" + "Description" + "" + "Type" + "" + "Valid Values" + "" + "Is Private?" + "" + "Is Unstable?" + "" + "Is Evolving?" + "
    " + configProperty.getPropertyName() + "" + configProperty.getDefaultValue() + "" + configProperty.getDescription() + "" + configProperty.getType() + "" + + configProperty.isPrivate() + "" + + configProperty.isEvolving() + "" + + configProperty.isUnstable() + "
    "); + + out.println("
    "); + out.println("
    "); + out.println(""); + out.println(""); + } finally { + if (out != null) { + out.close(); + } + } + } +} diff --git a/tez-tools/tez-javadoc-tools/src/main/java-17/org/apache/tez/tools/javadoc/util/Writer.java b/tez-tools/tez-javadoc-tools/src/main/java-17/org/apache/tez/tools/javadoc/util/Writer.java new file mode 100644 index 0000000000..523f8cae9f --- /dev/null +++ b/tez-tools/tez-javadoc-tools/src/main/java-17/org/apache/tez/tools/javadoc/util/Writer.java @@ -0,0 +1,36 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tez.tools.javadoc.util; + +import org.apache.tez.tools.javadoc.model.Config; +import org.apache.tez.tools.javadoc.model.ConfigProperty; + +import java.io.IOException; + +public abstract class Writer { + + public abstract void write(Config config) throws IOException; + + public boolean isValidConfigProperty(ConfigProperty configProperty) { + if (!configProperty.isValidConfigProp()) { + return false; + } + return configProperty.getPropertyName() != null && !configProperty.getPropertyName().isEmpty(); + } +} diff --git a/tez-tools/tez-javadoc-tools/src/main/java-17/org/apache/tez/tools/javadoc/util/XmlWriter.java b/tez-tools/tez-javadoc-tools/src/main/java-17/org/apache/tez/tools/javadoc/util/XmlWriter.java new file mode 100644 index 0000000000..591d82bc90 --- /dev/null +++ b/tez-tools/tez-javadoc-tools/src/main/java-17/org/apache/tez/tools/javadoc/util/XmlWriter.java @@ -0,0 +1,109 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tez.tools.javadoc.util; + +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.io.OutputStreamWriter; +import java.io.PrintWriter; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; + +import org.apache.commons.lang.StringEscapeUtils; +import org.apache.tez.tools.javadoc.model.Config; +import org.apache.tez.tools.javadoc.model.ConfigProperty; + +import com.google.common.io.ByteStreams; + +public class XmlWriter extends Writer { + + public void write(Config config) throws IOException { + PrintWriter out = null; + + if (config.getConfigName() == null || config.getConfigName().isEmpty()) { + throw new RuntimeException("Config Name is null or empty"); + } + + String fileName = config.getTemplateName() == null || + config.getTemplateName().isEmpty() ? config.getConfigName() : config.getTemplateName(); + if (!fileName.endsWith(".xml")) { + fileName += ".xml"; + } + + try { + File file = new File(fileName); + writeApacheHeader(file); + + out = new PrintWriter(new OutputStreamWriter(new FileOutputStream(file, true), StandardCharsets.UTF_8)); + + out.println(""); + out.println(); + out.println(""); + out.println(); + out.println(""); + + for (ConfigProperty configProperty : config.getConfigProperties().values()) { + if (!isValidConfigProperty(configProperty)) { + continue; + } + out.println(); + out.println(" "); + out.println(" " + configProperty.getPropertyName() + ""); + if (configProperty.getDefaultValue() != null && !configProperty.getDefaultValue().isEmpty()) { + out.println(" " + configProperty.getDefaultValue() + ""); + } + if (configProperty.getDescription() != null && !configProperty.getDescription().isEmpty()) { + out.println(" " + + StringEscapeUtils.escapeXml(configProperty.getDescription()) + ""); + } + if (configProperty.getType() != null && !configProperty.getType().isEmpty()) { + out.println(" " + configProperty.getType() + ""); + } + if (configProperty.isUnstable()) { + out.println(" true"); + } + if (configProperty.isEvolving()) { + out.println(" true"); + } + if (configProperty.isPrivate()) { + out.println(" true"); + } + out.println(" "); + } + + out.println(); + out.println(""); + } finally { + if (out != null) { + out.close(); + } + } + } + + private void writeApacheHeader(File file) throws IOException { + try (InputStream in = this.getClass().getClassLoader().getResourceAsStream("apache-licence.xml.header"); + OutputStream out = Files.newOutputStream(file.toPath())) { + ByteStreams.copy(in, out); + } + } +} diff --git a/tez-tools/tez-javadoc-tools/src/main/java-17/org/apache/tez/tools/javadoc/util/package-info.java b/tez-tools/tez-javadoc-tools/src/main/java-17/org/apache/tez/tools/javadoc/util/package-info.java new file mode 100644 index 0000000000..d458619729 --- /dev/null +++ b/tez-tools/tez-javadoc-tools/src/main/java-17/org/apache/tez/tools/javadoc/util/package-info.java @@ -0,0 +1,22 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +@Private +package org.apache.tez.tools.javadoc.util; + +import org.apache.hadoop.classification.InterfaceAudience.Private; \ No newline at end of file diff --git a/tez-tools/tez-javadoc-tools/src/main/java/org/apache/tez/tools/javadoc/doclet/ConfigStandardDoclet.java b/tez-tools/tez-javadoc-tools/src/main/java-8-16/org/apache/tez/tools/javadoc/doclet/ConfigStandardDoclet.java similarity index 80% rename from tez-tools/tez-javadoc-tools/src/main/java/org/apache/tez/tools/javadoc/doclet/ConfigStandardDoclet.java rename to tez-tools/tez-javadoc-tools/src/main/java-8-16/org/apache/tez/tools/javadoc/doclet/ConfigStandardDoclet.java index 6cc9ced865..a67b9dbcde 100644 --- a/tez-tools/tez-javadoc-tools/src/main/java/org/apache/tez/tools/javadoc/doclet/ConfigStandardDoclet.java +++ b/tez-tools/tez-javadoc-tools/src/main/java-8-16/org/apache/tez/tools/javadoc/doclet/ConfigStandardDoclet.java @@ -7,7 +7,7 @@ * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, @@ -19,9 +19,17 @@ package org.apache.tez.tools.javadoc.doclet; import java.io.IOException; -import java.util.HashMap; import java.util.Map; +import com.sun.javadoc.AnnotationDesc; +import com.sun.javadoc.AnnotationDesc.ElementValuePair; +import com.sun.javadoc.ClassDoc; +import com.sun.javadoc.DocErrorReporter; +import com.sun.javadoc.FieldDoc; +import com.sun.javadoc.LanguageVersion; +import com.sun.javadoc.RootDoc; +import com.sun.tools.doclets.standard.Standard; + import org.apache.hadoop.classification.InterfaceAudience.Private; import org.apache.hadoop.classification.InterfaceStability.Evolving; import org.apache.hadoop.classification.InterfaceStability.Unstable; @@ -32,20 +40,14 @@ import org.apache.tez.tools.javadoc.util.HtmlWriter; import org.apache.tez.tools.javadoc.util.XmlWriter; -import com.sun.javadoc.AnnotationDesc; -import com.sun.javadoc.AnnotationDesc.ElementValuePair; -import com.sun.javadoc.ClassDoc; -import com.sun.javadoc.DocErrorReporter; -import com.sun.javadoc.FieldDoc; -import com.sun.javadoc.LanguageVersion; -import com.sun.javadoc.RootDoc; -import com.sun.tools.doclets.standard.Standard; - -public class ConfigStandardDoclet { +public final class ConfigStandardDoclet { private static final String DEBUG_SWITCH = "-debug"; private static boolean debugMode = false; + private ConfigStandardDoclet() { + } + public static LanguageVersion languageVersion() { return LanguageVersion.JAVA_1_5; } @@ -63,14 +65,15 @@ public static boolean start(RootDoc root) { for (String opt : opts) { if (opt.equals(DEBUG_SWITCH)) { debugMode = true; + break; } } } logMessage("Running doclet " + ConfigStandardDoclet.class.getSimpleName()); ClassDoc[] classes = root.classes(); - for (int i = 0; i < classes.length; ++i) { - processDoc(classes[i]); + for (ClassDoc aClass : classes) { + processDoc(aClass); } return true; @@ -107,7 +110,7 @@ private static void processDoc(ClassDoc doc) { logMessage("Processing config class: " + doc); Config config = new Config(doc.name(), templateName); - Map configProperties = config.configProperties; + Map configProperties = config.getConfigProperties(); FieldDoc[] fields = doc.fields(); for (FieldDoc field : fields) { @@ -142,16 +145,16 @@ private static void processDoc(ClassDoc doc) { + ", name=" + name + ", field=" + field.name() + ", val=" + field.constantValueExpression()); - configProperty.defaultValue = field.constantValueExpression(); + configProperty.setDefaultValue(field.constantValueExpression()); } else { - configProperty.defaultValue = field.constantValue().toString(); + configProperty.setDefaultValue(field.constantValue().toString()); } - configProperty.inferredType = field.type().simpleTypeName(); + configProperty.setInferredType(field.type().simpleTypeName()); - if (name.equals("TEZ_AM_STAGING_DIR") && configProperty.defaultValue != null) { - String defaultValue = configProperty.defaultValue; + if (name.equals("TEZ_AM_STAGING_DIR") && configProperty.getDefaultValue() != null) { + String defaultValue = configProperty.getDefaultValue(); defaultValue = defaultValue.replace(System.getProperty("user.name"), "${user.name}"); - configProperty.defaultValue = defaultValue; + configProperty.setDefaultValue(defaultValue); } continue; @@ -162,7 +165,7 @@ private static void processDoc(ClassDoc doc) { configProperties.put(name, new ConfigProperty()); } ConfigProperty configProperty = configProperties.get(name); - configProperty.propertyName = field.constantValue().toString(); + configProperty.setPropertyName(field.constantValue().toString()); AnnotationDesc[] annotationDescs = field.annotations(); @@ -170,55 +173,49 @@ private static void processDoc(ClassDoc doc) { if (annotationDesc.annotationType().qualifiedTypeName().equals( Private.class.getCanonicalName())) { - configProperty.isPrivate = true; + configProperty.setPrivate(true); } if (annotationDesc.annotationType().qualifiedTypeName().equals( Unstable.class.getCanonicalName())) { - configProperty.isUnstable = true; + configProperty.setUnstable(true); } if (annotationDesc.annotationType().qualifiedTypeName().equals( Evolving.class.getCanonicalName())) { - configProperty.isEvolving = true; + configProperty.setEvolving(true); } if (annotationDesc.annotationType().qualifiedTypeName().equals( ConfigurationProperty.class.getCanonicalName())) { - configProperty.isValidConfigProp = true; + configProperty.setValidConfigProp(true); - boolean foundType = false; for (ElementValuePair element : annotationDesc.elementValues()) { if (element.element().name().equals("type")) { - configProperty.type = stripQuotes(element.value().toString()); - foundType = true; + configProperty.setType(stripQuotes(element.value().toString())); } else { logMessage("Unhandled annotation property: " + element.element().name()); } } } - } - - configProperty.description = field.commentText(); - } - - HtmlWriter writer = new HtmlWriter(); - try { - writer.write(config); - } catch (IOException e) { - throw new RuntimeException(e); - } + HtmlWriter writer = new HtmlWriter(); + try { + writer.write(config); + } catch (IOException e) { + throw new RuntimeException(e); + } - XmlWriter xmlWriter = new XmlWriter(); - try { - xmlWriter.write(config); - } catch (IOException e) { - throw new RuntimeException(e); + XmlWriter xmlWriter = new XmlWriter(); + try { + xmlWriter.write(config); + } catch (IOException e) { + throw new RuntimeException(e); + } + } } - } private static String stripQuotes(String s) { - if (s.charAt(0) == '"' && s.charAt(s.length()-1) == '"') { - return s.substring(1, s.length()-1); + if (s.charAt(0) == '"' && s.charAt(s.length() - 1) == '"') { + return s.substring(1, s.length() - 1); } return s; } @@ -227,7 +224,7 @@ public static int optionLength(String option) { return Standard.optionLength(option); } - public static boolean validOptions(String options[][], DocErrorReporter reporter) { + public static boolean validOptions(String[][] options, DocErrorReporter reporter) { return true; } } diff --git a/tez-tools/tez-javadoc-tools/src/main/java-8-16/org/apache/tez/tools/javadoc/doclet/package-info.java b/tez-tools/tez-javadoc-tools/src/main/java-8-16/org/apache/tez/tools/javadoc/doclet/package-info.java new file mode 100644 index 0000000000..190ae4622e --- /dev/null +++ b/tez-tools/tez-javadoc-tools/src/main/java-8-16/org/apache/tez/tools/javadoc/doclet/package-info.java @@ -0,0 +1,22 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +@Private +package org.apache.tez.tools.javadoc.doclet; + +import org.apache.hadoop.classification.InterfaceAudience.Private; \ No newline at end of file diff --git a/tez-tools/tez-javadoc-tools/src/main/java/org/apache/tez/tools/javadoc/model/Config.java b/tez-tools/tez-javadoc-tools/src/main/java-8-16/org/apache/tez/tools/javadoc/model/Config.java similarity index 66% rename from tez-tools/tez-javadoc-tools/src/main/java/org/apache/tez/tools/javadoc/model/Config.java rename to tez-tools/tez-javadoc-tools/src/main/java-8-16/org/apache/tez/tools/javadoc/model/Config.java index 604d48ac5e..0556a4e37a 100644 --- a/tez-tools/tez-javadoc-tools/src/main/java/org/apache/tez/tools/javadoc/model/Config.java +++ b/tez-tools/tez-javadoc-tools/src/main/java-8-16/org/apache/tez/tools/javadoc/model/Config.java @@ -23,18 +23,29 @@ public class Config { - public final String templateName; - public final String configName; - public Map configProperties; + private final String templateName; + private final String configName; + private Map configProperties; public Config(String configName, String templateName) { this.configName = configName; this.templateName = templateName; - this.configProperties = new TreeMap(); + this.setConfigProperties(new TreeMap()); } - public Config() { - this(null, null); + public String getTemplateName() { + return templateName; } + public String getConfigName() { + return configName; + } + + public Map getConfigProperties() { + return configProperties; + } + + public void setConfigProperties(Map configProperties) { + this.configProperties = configProperties; + } } diff --git a/tez-tools/tez-javadoc-tools/src/main/java-8-16/org/apache/tez/tools/javadoc/model/ConfigProperty.java b/tez-tools/tez-javadoc-tools/src/main/java-8-16/org/apache/tez/tools/javadoc/model/ConfigProperty.java new file mode 100644 index 0000000000..27208de55b --- /dev/null +++ b/tez-tools/tez-javadoc-tools/src/main/java-8-16/org/apache/tez/tools/javadoc/model/ConfigProperty.java @@ -0,0 +1,115 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tez.tools.javadoc.model; + +public class ConfigProperty { + + private String propertyName; + private String defaultValue; + private String description; + private String type = "string"; + private boolean isPrivate = false; + private boolean isUnstable = false; + private boolean isEvolving = false; + private boolean isValidConfigProp = false; + private String inferredType; + + public String getPropertyName() { + return propertyName; + } + + public void setPropertyName(String propertyName) { + this.propertyName = propertyName; + } + + public String getDefaultValue() { + return defaultValue; + } + + public void setDefaultValue(String defaultValue) { + this.defaultValue = defaultValue; + } + + public String getDescription() { + return description; + } + + public void setDescription(String description) { + this.description = description; + } + + public String getType() { + return type; + } + + public void setType(String type) { + this.type = type; + } + + public boolean isPrivate() { + return isPrivate; + } + + public void setPrivate(boolean aPrivate) { + isPrivate = aPrivate; + } + + public boolean isUnstable() { + return isUnstable; + } + + public void setUnstable(boolean unstable) { + isUnstable = unstable; + } + + public boolean isEvolving() { + return isEvolving; + } + + public void setEvolving(boolean evolving) { + isEvolving = evolving; + } + + public boolean isValidConfigProp() { + return isValidConfigProp; + } + + public void setValidConfigProp(boolean validConfigProp) { + isValidConfigProp = validConfigProp; + } + + public String getInferredType() { + return inferredType; + } + + public void setInferredType(String inferredType) { + this.inferredType = inferredType; + } + + @Override + public String toString() { + return "name=" + getPropertyName() + + ", defaultValue=" + getDefaultValue() + + ", description=" + getDescription() + + ", type=" + getType() + + ", inferredType=" + getInferredType() + + ", private=" + isPrivate() + + ", isConfigProp=" + isValidConfigProp(); + } +} diff --git a/tez-tools/tez-javadoc-tools/src/main/java-8-16/org/apache/tez/tools/javadoc/model/package-info.java b/tez-tools/tez-javadoc-tools/src/main/java-8-16/org/apache/tez/tools/javadoc/model/package-info.java new file mode 100644 index 0000000000..76acd05258 --- /dev/null +++ b/tez-tools/tez-javadoc-tools/src/main/java-8-16/org/apache/tez/tools/javadoc/model/package-info.java @@ -0,0 +1,22 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +@Private +package org.apache.tez.tools.javadoc.model; + +import org.apache.hadoop.classification.InterfaceAudience.Private; \ No newline at end of file diff --git a/tez-tools/tez-javadoc-tools/src/main/java/org/apache/tez/tools/javadoc/util/HtmlWriter.java b/tez-tools/tez-javadoc-tools/src/main/java-8-16/org/apache/tez/tools/javadoc/util/HtmlWriter.java similarity index 76% rename from tez-tools/tez-javadoc-tools/src/main/java/org/apache/tez/tools/javadoc/util/HtmlWriter.java rename to tez-tools/tez-javadoc-tools/src/main/java-8-16/org/apache/tez/tools/javadoc/util/HtmlWriter.java index 4b531e87e5..548908c8e1 100644 --- a/tez-tools/tez-javadoc-tools/src/main/java/org/apache/tez/tools/javadoc/util/HtmlWriter.java +++ b/tez-tools/tez-javadoc-tools/src/main/java-8-16/org/apache/tez/tools/javadoc/util/HtmlWriter.java @@ -19,13 +19,11 @@ package org.apache.tez.tools.javadoc.util; import java.io.File; -import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.IOException; import java.io.OutputStreamWriter; import java.io.PrintWriter; -import org.apache.tez.dag.api.TezException; import org.apache.tez.tools.javadoc.model.Config; import org.apache.tez.tools.javadoc.model.ConfigProperty; @@ -36,12 +34,12 @@ public class HtmlWriter extends Writer { public void write(Config config) throws IOException { PrintWriter out = null; - if (config.configName == null || config.configName.isEmpty()) { + if (config.getConfigName() == null || config.getConfigName().isEmpty()) { throw new RuntimeException("Config Name is null or empty"); } try { - File file = new File(config.configName + ".html"); + File file = new File(config.getConfigName() + ".html"); out = new PrintWriter(new OutputStreamWriter(new FileOutputStream(file), "UTF-8")); out.println(""); @@ -52,8 +50,7 @@ public void write(Config config) throws IOException { out.println(""); out.println(""); - out.println(""+ config.configName +""); -// out.println(""); + out.println("" + config.getConfigName() + ""); out.println(""); out.println(" + +{{!--Header--}} +
    + {{#each headerComponentNames as |componentName|}} + {{component componentName tableDefinition=_definition dataProcessor=_dataProcessor}} + {{/each}} +
    + +
    +
    + {{#if leftPanelComponentName}} + {{component leftPanelComponentName tableDefinition=_definition dataProcessor=_dataProcessor}} + {{/if}} +
    + + {{#if message}} +

    {{message}}

    + {{else}} + {{!--Body--}} + {{#if _columns.left.length}} +
    + {{#each _columns.left as |column colIndex|}} + {{em-table-column + rows=_dataProcessor.processedRows + definition=column.definition + defaultWidth=column.width + tableDefinition=_definition + dataProcessor=_dataProcessor + index=colIndex + }} + {{/each}} +
    + {{/if}} + + + + +
    +
    + {{#each _columns.center as |column colIndex|}} + {{em-table-column + rows=_dataProcessor.processedRows + definition=column.definition + defaultWidth=column.width + tableDefinition=_definition + dataProcessor=_dataProcessor + index=colIndex + }} + {{/each}} +
    +
    + + + + + {{#if _columns.right.length}} +
    + {{#each _columns.right as |column colIndex|}} + {{em-table-column + rows=_dataProcessor.processedRows + definition=column.definition + defaultWidth=column.width + tableDefinition=_definition + dataProcessor=_dataProcessor + index=colIndex + }} + {{/each}} +
    + {{/if}} + {{/if}} + +
    + {{#if rightPanelComponentName}} + {{component rightPanelComponentName tableDefinition=_definition dataProcessor=_dataProcessor}} + {{/if}} +
    +
    + +{{!--Footer--}} +{{#if displayFooter}} + +{{/if}} diff --git a/tez-ui/src/main/webapp/app/templates/components/stats-link.hbs b/tez-ui/src/main/webapp/app/templates/components/stats-link.hbs index 0d91ae9b26..d8155eec61 100644 --- a/tez-ui/src/main/webapp/app/templates/components/stats-link.hbs +++ b/tez-ui/src/main/webapp/app/templates/components/stats-link.hbs @@ -17,9 +17,15 @@ }} {{#if value}} - {{#link-to routeName (query-params searchText=searchText)}} - {{txt value type="number"}} {{_statsType}} - {{/link-to}} + {{#if searchText}} + {{#link-to routeName (query-params searchText=searchText)}} + {{txt value type="number"}} + {{/link-to}} + {{else}} + {{#link-to routeName}} + {{txt value type="number"}} + {{/link-to}} + {{/if}} {{else}} {{txt value type="number"}} {{/if}} diff --git a/tez-ui/src/main/webapp/app/templates/dag/index.hbs b/tez-ui/src/main/webapp/app/templates/dag/index.hbs index ad1e093a0a..ae148025ca 100644 --- a/tez-ui/src/main/webapp/app/templates/dag/index.hbs +++ b/tez-ui/src/main/webapp/app/templates/dag/index.hbs @@ -51,7 +51,7 @@ Status - {{em-table-status-cell content=model.status}} + {{em-table-status-cell content=model.finalStatus}} Progress diff --git a/tez-ui/src/main/webapp/app/templates/dag/index/index.hbs b/tez-ui/src/main/webapp/app/templates/dag/index/index.hbs index e39b514e97..de425af2d7 100644 --- a/tez-ui/src/main/webapp/app/templates/dag/index/index.hbs +++ b/tez-ui/src/main/webapp/app/templates/dag/index/index.hbs @@ -25,22 +25,22 @@ + + Total Vertices + {{stats-link value=stats.totalVertices routeName="dag.vertices"}} + Succeeded Vertices {{stats-link value=stats.succeededVertices routeName="dag.vertices" statsType="SUCCEEDED"}} - Total Vertices - {{stats.totalVertices}} + Total Tasks + {{stats-link value=stats.totalTasks routeName="dag.tasks"}} Succeeded Tasks {{stats-link value=stats.succeededTasks routeName="dag.tasks" statsType="SUCCEEDED"}} - - Total Tasks - {{stats.totalTasks}} - Failed Tasks {{stats-link value=stats.failedTasks routeName="dag.tasks" statsType="FAILED"}} diff --git a/tez-ui/src/main/webapp/app/templates/vertex/index.hbs b/tez-ui/src/main/webapp/app/templates/vertex/index.hbs index ff4f4fb7a7..ce083436df 100644 --- a/tez-ui/src/main/webapp/app/templates/vertex/index.hbs +++ b/tez-ui/src/main/webapp/app/templates/vertex/index.hbs @@ -68,7 +68,7 @@ Total Tasks - {{txt model.totalTasks type="number"}} + {{stats-link value=model.totalTasks routeName="vertex.tasks"}} Succeeded Tasks @@ -82,6 +82,14 @@ Killed Tasks {{stats-link value=model.killedTasks routeName="vertex.tasks" statsType="KILLED"}} + + Failed Task Attempts + {{stats-link value=model.failedTaskAttempts routeName="vertex.attempts" statsType="FAILED"}} + + + Killed Task Attempts + {{stats-link value=model.killedTaskAttempts routeName="vertex.attempts" statsType="KILLED"}} + First Task Start Time diff --git a/tez-ui/src/main/webapp/app/utils/column-definition.js b/tez-ui/src/main/webapp/app/utils/column-definition.js new file mode 100644 index 0000000000..1316866ab1 --- /dev/null +++ b/tez-ui/src/main/webapp/app/utils/column-definition.js @@ -0,0 +1,125 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import Ember from 'ember'; + +import facetTypes from './facet-types'; + +function getContentAtPath(row) { + var contentPath = this.get('contentPath'); + + if(contentPath) { + return Ember.get(row, contentPath); + } + else { + throw new Error("contentPath not set!"); + } +} + +function returnEmptyString() { + return ""; +} + +var ColumnDefinition = Ember.Object.extend({ + id: "", + headerTitle: "Not Available!", + + classNames: [], + + cellComponentName: null, + + enableSearch: true, + enableSort: true, + enableColumnResize: true, + + width: null, + minWidth: "150px", + + contentPath: null, + observePath: false, + + cellDefinition: null, + + pin: "center", + + facetType: facetTypes.VALUES, + + beforeSort: null, + getCellContent: getContentAtPath, + getSearchValue: getContentAtPath, + getSortValue: getContentAtPath, + + init: function () { + if(!this.get("id")) { + throw new Error("ID is not set."); + } + }, +}); + +ColumnDefinition.make = function (rawDefinition) { + if(Array.isArray(rawDefinition)) { + return rawDefinition.map(function (def) { + return ColumnDefinition.create(def); + }); + } + else if(typeof rawDefinition === 'object') { + return ColumnDefinition.create(rawDefinition); + } + else { + throw new Error("rawDefinition must be an Array or an Object."); + } +}; + +ColumnDefinition.makeFromModel = function (ModelClass, columnOptions) { + var attributes = Ember.get(ModelClass, 'attributes'), + columns = []; + if(attributes) { + attributes.forEach(function (meta, name) { + var column = Ember.Object.create({ + id: name, + headerTitle: name.capitalize(), + contentPath: name, + }); + + if(columnOptions) { + column.setProperties(columnOptions); + } + + columns.push(column); + }); + + return ColumnDefinition.make(columns); + } + else { + throw new Error("Value passed is not a model class"); + } +}; + +ColumnDefinition.fillerColumn = ColumnDefinition.create({ + id: "fillerColumn", + headerTitle: "", + getCellContent: returnEmptyString, + getSearchValue: returnEmptyString, + getSortValue: returnEmptyString, + + enableSearch: false, + enableSort: false, + enableColumnResize: false, +}); + +export default ColumnDefinition; diff --git a/tez-ui/src/main/webapp/app/utils/counter-column-definition.js b/tez-ui/src/main/webapp/app/utils/counter-column-definition.js index d66e551eed..5590e10244 100644 --- a/tez-ui/src/main/webapp/app/utils/counter-column-definition.js +++ b/tez-ui/src/main/webapp/app/utils/counter-column-definition.js @@ -19,7 +19,7 @@ import Ember from 'ember'; import isIOCounter from '../utils/misc'; -import ColumnDefinition from 'em-table/utils/column-definition'; +import ColumnDefinition from './column-definition'; /* * Returns a counter value from for a row diff --git a/tez-ui/src/main/webapp/app/utils/data-processor.js b/tez-ui/src/main/webapp/app/utils/data-processor.js new file mode 100644 index 0000000000..07d31c09bf --- /dev/null +++ b/tez-ui/src/main/webapp/app/utils/data-processor.js @@ -0,0 +1,275 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import Ember from 'ember'; + +import SQL from './sql'; + +/** + * Handles Sorting, Searching & Pagination + */ +export default Ember.Object.extend({ + isSorting: false, + isSearching: false, + + tableDefinition: null, + + sql: SQL.create(), + + rows: [], + _sortedRows: [], + _searchedRows: [], + _facetFilteredRows: [], + + _searchObserver: Ember.on("init", Ember.observer('tableDefinition.searchText', 'tableDefinition._actualSearchType', '_sortedRows.[]', function () { + Ember.run.once(this, "startSearch"); + })), + + _sortObserver: Ember.on("init", Ember.observer( + 'tableDefinition.sortColumnId', + 'tableDefinition.sortOrder', + 'rows.[]', function () { + Ember.run.once(this, "startSort"); + })), + + _facetedFilterObserver: Ember.on("init", Ember.observer('tableDefinition.facetConditions', '_searchedRows.[]', function () { + Ember.run.once(this, "startFacetedFilter"); + })), + + regexSearch: function (clause, rows, columns) { + var regex; + + try { + regex = new RegExp(clause, "i"); + } + catch(e) { + regex = new RegExp("", "i"); + } + + function checkRow(column) { + var value; + if(!column.get('enableSearch')) { + return false; + } + value = column.getSearchValue(this); + + if(typeof value === 'string') { + value = value.toLowerCase(); + return value.match(regex); + } + + return false; + } + + return rows.filter(function (row) { + return columns.some(checkRow, row); + }); + }, + + startSearch: function () { + var searchText = String(this.get('tableDefinition.searchText')), + rows = this.get('_sortedRows') || [], + columns = this.get('tableDefinition.columns'), + actualSearchType = this.get('tableDefinition._actualSearchType'), + that = this; + + if(searchText) { + this.set("isSearching", true); + + Ember.run.later(function () { + var result; + + switch(actualSearchType) { + case "SQL": + result = that.get("sql").search(searchText, rows, columns); + break; + + //case "Regex": Commenting as default will be called anyways + default: + result = that.regexSearch(searchText, rows, columns); + break; + } + + that.setProperties({ + _searchedRows: result, + isSearching: false + }); + }); + } + else { + this.set("_searchedRows", rows); + } + }, + + compareFunction: function (a, b){ + // Checking for undefined and null to handle some special cases in JavaScript comparison + // Eg: 1 > undefined = false & 1 < undefined = false + // "a1" > null = false & "a1" < null = false + if(a === undefined || a === null) { + return -1; + } + else if(b === undefined || b === null) { + return 1; + } + else if(a < b) { + return -1; + } + else if(a > b) { + return 1; + } + else { + return 0; + } + }, + + startSort: function () { + var rows = this.get('rows'), + tableDefinition = this.get('tableDefinition'), + sortColumnId = this.get('tableDefinition.sortColumnId'), + descending = this.get('tableDefinition.sortOrder') === 'desc', + that = this, + column; + + if(tableDefinition) { + column = tableDefinition.get('columns').find(function (element) { + return element.get('id') === sortColumnId; + }); + } + + if(rows && Array.isArray(rows.content)) { + rows = rows.toArray(); + } + + if(rows && rows.get('length') > 0 && column) { + this.set('isSorting', true); + + Ember.run.later(function () { + /* + * Creating sortArray as calling getSortValue form inside the + * sort function every time would be more costly. + */ + var sortArray = rows.map(function (row) { + return { + value: column.getSortValue(row), + row: row + }; + }), + compareFunction = that.get("compareFunction"); + + sortArray.sort(function (a, b) { + var result = compareFunction(a.value, b.value); + if(descending && result) { + result = -result; + } + return result; + }); + + that.setProperties({ + _sortedRows: sortArray.map(function (record) { + return record.row; + }), + isSorting: false + }); + }); + } + else { + this.set('_sortedRows', rows); + } + }, + + startFacetedFilter: function () { + var clause = this.get("sql").createFacetClause(this.get('tableDefinition.facetConditions'), this.get("tableDefinition.columns")), + rows = this.get('_searchedRows') || [], + columns = this.get('tableDefinition.columns'), + that = this; + + if(clause && columns) { + this.set("isSearching", true); + + Ember.run.later(function () { + var result = that.get("sql").search(clause, rows, columns); + + that.setProperties({ + _facetFilteredRows: result, + isSearching: false + }); + }); + } + else { + this.set("_facetFilteredRows", rows); + } + }, + + facetedFields: Ember.computed('_searchedRows.[]', 'tableDefinition.columns', function () { + var searchedRows = this.get("_searchedRows"), + columns = this.get('tableDefinition.columns'), + fields = []; + + if(columns) { + columns.forEach(function (column) { + var facetedData; + if(column.facetType) { + facetedData = column.facetType.facetRows(column, searchedRows); + if(facetedData) { + fields.push({ + column: column, + facets: facetedData + }); + } + } + }); + } + + return fields; + }), + + pageDetails: Ember.computed("tableDefinition.rowCount", "tableDefinition.pageNum", "_facetFilteredRows.length", function () { + var tableDefinition = this.get("tableDefinition"), + + pageNum = tableDefinition.get('pageNum'), + rowCount = tableDefinition.get('rowCount'), + + startIndex = (pageNum - 1) * rowCount, + + totalRecords = this.get('_facetFilteredRows.length'); + + if(startIndex < 0) { + startIndex = 0; + } + + return { + pageNum: pageNum, + totalPages: Math.ceil(totalRecords / rowCount), + rowCount: rowCount, + + startIndex: startIndex, + + fromRecord: totalRecords ? startIndex + 1 : 0, + toRecord: Math.min(startIndex + rowCount, totalRecords), + totalRecords: totalRecords + }; + }), + totalPages: Ember.computed.alias("pageDetails.totalPages"), // Adding an alias for backward compatibility + + // Paginate + processedRows: Ember.computed('_facetFilteredRows.[]', 'tableDefinition.rowCount', 'tableDefinition.pageNum', function () { + var rowCount = this.get('tableDefinition.rowCount'), + startIndex = (this.get('tableDefinition.pageNum') - 1) * rowCount; + return this.get('_facetFilteredRows').slice(startIndex, startIndex + rowCount); + }), +}); diff --git a/tez-ui/src/main/webapp/app/utils/facet-types.js b/tez-ui/src/main/webapp/app/utils/facet-types.js new file mode 100644 index 0000000000..0a340bbf7d --- /dev/null +++ b/tez-ui/src/main/webapp/app/utils/facet-types.js @@ -0,0 +1,85 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import Ember from 'ember'; + +var facetTypes = { + VALUES: { + componentName: "em-table-facet-panel-values", + + toClause: function (column, facetConditions) { + var values, clauses = []; + + if(facetConditions) { + if(Ember.get(facetConditions, "in.length")) { + values = facetConditions.in.map(function (value) { + value = value.replace(/'/g, "''"); + return `'${value}'`; + }); + clauses.push(`${column.id} IN (${values})`); + } + + if(Ember.get(facetConditions, "notIn.length")) { + values = facetConditions.notIn.map(function (value) { + value = value.replace(/'/g, "''"); + return `'${value}'`; + }); + clauses.push(`${column.id} NOT IN (${values})`); + } + + return clauses.join(" AND "); + } + }, + + facetRows: function (column, rows) { + var facetedDataHash = {}, + facetedDataArr = []; + + rows.forEach(function (row) { + var value = column.getSearchValue(row); + + if(typeof value === "string") { + if(!facetedDataHash[value]) { + facetedDataHash[value] = { + count: 0, + value: value + }; + facetedDataArr.push(facetedDataHash[value]); + } + facetedDataHash[value].count++; + } + + }); + + if(facetedDataArr.length) { + facetedDataArr = facetedDataArr.sort(function (a, b) { + return -(a.count - b.count); // Sort in reverse order + }); + return facetedDataArr; + } + }, + + normaliseConditions: function (conditions, data) { + if(Ember.get(conditions, "in.length") < data.length) { + return conditions; + } + } + }, +}; + +export default facetTypes; diff --git a/tez-ui/src/main/webapp/app/utils/formatters.js b/tez-ui/src/main/webapp/app/utils/formatters.js new file mode 100644 index 0000000000..d724eb85ba --- /dev/null +++ b/tez-ui/src/main/webapp/app/utils/formatters.js @@ -0,0 +1,146 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import Ember from 'ember'; + +import moment from 'moment'; +import numeral from 'numeral'; + +const DEFAULT_DATE_TIMEZONE = "UTC", + DEFAULT_DATE_FORMAT = "DD MMM YYYY HH:mm:ss", + DEFAULT_NUM_FORMAT = '0,0', + DEFAULT_MEM_FORMAT = '0 b'; + +function durationFormatter(arr, value, unit) { + if(value > 0) { + if(value > 1) { + unit += 's'; + } + arr.push(value + unit); + } +} + +const DURATION_FORMATS = { + long: { + collateFunction: durationFormatter, + + year: " year", + month: " month", + day: " day", + hour: " hour", + minute: " minute", + second: " second", + millisecond: " millisecond" + }, + short: { + collateFunction: durationFormatter, + + year: " yr", + month: " mo", + day: " day", + hour: " hr", + minute: " min", + second: " sec", + millisecond: " msec" + }, + xshort: { + collateFunction: function (arr, value, unit) { + if(value > 0) { + arr.push(value + unit); + } + }, + + year: "Y", + month: "M", + day: "D", + hour: "h", + minute: "m", + second: "s", + millisecond: "ms" + } +}; + +function validateNumber(value, message) { + value = parseFloat(value); + + if(isNaN(value)) { + throw new Error(message || "Invalid number!"); + } + + return value; +} + +export default Ember.Controller.create({ + date: function (value, options) { + var date = moment.tz(value, options.valueFormat, options.valueTimeZone || DEFAULT_DATE_TIMEZONE); + + date = options.timeZone ? date.tz(options.timeZone) : date.local(); + date = date.format(options.format || DEFAULT_DATE_FORMAT); + + if(date === "Invalid date") { + throw new Error(date); + } + + return date; + }, + duration: function (value, options) { + var format = DURATION_FORMATS[options.format || "xshort"], + duration, + ret = []; + + value = validateNumber(value, "Invalid duration"); + + if(value === 0) { + return `0${format.millisecond}`; + } + + duration = moment.duration(value, options.valueUnit); + + format.collateFunction(ret, duration.years(), format.year); + format.collateFunction(ret, duration.months(), format.month); + format.collateFunction(ret, duration.days(), format.day); + format.collateFunction(ret, duration.hours(), format.hour); + format.collateFunction(ret, duration.minutes(), format.minute); + format.collateFunction(ret, duration.seconds(), format.second); + format.collateFunction(ret, Math.round(duration.milliseconds()), format.millisecond); + + return ret.join(" "); + }, + number: function (value, options) { + value = validateNumber(value); + return numeral(value).format(options.format || DEFAULT_NUM_FORMAT); + }, + memory: function (value) { + value = validateNumber(value, "Invalid memory"); + if(value === 0) { + return "0 B"; + } + return numeral(value).format(DEFAULT_MEM_FORMAT); + }, + json: function (value, options) { + if(value && typeof value === "object" && value.constructor === Object) { + try { + value = JSON.stringify(value, options.replacer, options.space || 4); + } + catch(err){ + Ember.Logger.error(err); + } + } + return value; + } +}); diff --git a/tez-ui/src/main/webapp/app/utils/sql.js b/tez-ui/src/main/webapp/app/utils/sql.js new file mode 100644 index 0000000000..81db3a07f5 --- /dev/null +++ b/tez-ui/src/main/webapp/app/utils/sql.js @@ -0,0 +1,94 @@ +/*global alasql*/ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +import Ember from 'ember'; + +/* + * A wrapper around AlaSQL + */ +export default Ember.Object.extend({ + + constructQuery: function(clause) { + return `SELECT * FROM ? WHERE ${clause}`; + }, + + validateClause: function (clause, columns) { + clause = clause.toString(); + + var query = this.constructQuery(this.normaliseClause(clause, columns || [])), + valid = false; + + if(clause.match(/\W/g)) { // If it contain special characters including space + try { + alasql(query, [[{}]]); + valid = true; + } + catch(e) {} + } + + return valid; + }, + + createFacetClause: function (conditions, columns) { + if(conditions && columns) { + return columns.map(function (column) { + if(column.get("facetType")) { + return column.get("facetType.toClause")(column, conditions[Ember.get(column, "id")]); + } + }).filter(clause => clause).join(" AND "); + } + }, + + normaliseClause: function (clause, columns) { + clause = clause.toString(); + columns.forEach(function (column) { + var headerTitle = column.get("headerTitle"); + clause = clause.replace(new RegExp(`"${headerTitle}"`, "gi"), column.get("id")); + }); + return clause; + }, + + search: function (clause, rows, columns) { + clause = this.normaliseClause(clause, columns); + + // Convert into a form that alasql can digest easily + var dataSet = rows.map(function (row, index) { + var rowObj = { + _index_: index + }; + + columns.forEach(function (column) { + if(column.get("enableSearch") && row) { + rowObj[column.get("id")] = column.getSearchValue(row); + } + }); + + return rowObj; + }); + + // Search + dataSet = alasql(this.constructQuery(clause), [dataSet]); + + return dataSet.map(function (data) { + return rows[data._index_]; + }); + } + +}); diff --git a/tez-ui/src/main/webapp/app/utils/table-definition.js b/tez-ui/src/main/webapp/app/utils/table-definition.js new file mode 100644 index 0000000000..c304ec4e80 --- /dev/null +++ b/tez-ui/src/main/webapp/app/utils/table-definition.js @@ -0,0 +1,61 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import Ember from 'ember'; + +export default Ember.Object.extend({ + + recordType: "", + + // Search + enableSearch: true, + searchText: '', + searchType: 'auto', // Can be either of auto, manual, regex OR sql + _actualSearchType: "Regex", // Set from em-table-search-ui + + // Faceting + enableFaceting: false, + facetConditions: null, + minFieldsForFilter: 15, + minValuesToDisplay: 2, + facetValuesPageSize: 10, + + // Sort + enableSort: true, + sortColumnId: '', + sortOrder: '', + headerAsSortButton: false, + + // Pagination + enablePagination: true, + pageNum: 1, + rowCount: 10, + rowCountOptions: [5, 10, 25, 50, 100], + + enableColumnResize: true, + showScrollShadow: false, + + minRowsForFooter: 25, + + columns: [], + + _pageNumResetObserver: Ember.observer('searchText', 'facetConditions', 'rowCount', function () { + this.set('pageNum', 1); + }), + +}); diff --git a/tez-ui/src/main/webapp/bower-shrinkwrap.json b/tez-ui/src/main/webapp/bower-shrinkwrap.json index 357d57691a..0fb0a7c899 100644 --- a/tez-ui/src/main/webapp/bower-shrinkwrap.json +++ b/tez-ui/src/main/webapp/bower-shrinkwrap.json @@ -2,9 +2,6 @@ "https://github.com/FortAwesome/Font-Awesome.git": { "4.5.0": "593ad563a987977f14102be935d0abc2a172903e" }, - "https://github.com/Teleborder/FileSaver.js.git": { - "1.20150507.2": "b7cf622909258086bc63ad764d08fcaed780ab42" - }, "https://github.com/adamwdraper/Numeral-js.git": { "1.5.3": "f97f14bb8bab988f28f1d854525b4cfeff8ec9e1" }, @@ -26,6 +23,9 @@ "https://github.com/dockyard/qunit-notifications.git": { "0.1.1": "7a13f6dba5a340e1cb9e0b64c1c711e4d7edaca1" }, + "https://github.com/eligrey/FileSaver.js.git": { + "1.2.0": "a6d11998e279e94f2926b2a897231355dfab48ab" + }, "https://github.com/ember-cli/ember-cli-shims.git": { "0.0.6": "dcab43b58d5698690050bb9a46ead5c8663c7da1" }, @@ -48,7 +48,7 @@ "0.5.0": "74a2e9378ecf4a31a168f3049f086565c8d66814" }, "https://github.com/moment/moment.git": { - "2.12.0": "d3d7488b4d60632854181cb0a9af325d57fb3d51" + "2.29.4": "000ac1800e620f770f4eb31b5ae908f6167b0ab2" }, "https://github.com/rwjblue/ember-qunit-builds.git": { "0.4.16": "142c4066a5458bef9dfcb92b70152b9c01d79188" @@ -69,4 +69,4 @@ "https://github.com/twbs/bootstrap.git": { "3.3.6": "81df608a40bf0629a1dc08e584849bb1e43e0b7a" } -} \ No newline at end of file +} diff --git a/tez-ui/src/main/webapp/bower.json b/tez-ui/src/main/webapp/bower.json index 56a69f323e..ff939fd401 100644 --- a/tez-ui/src/main/webapp/bower.json +++ b/tez-ui/src/main/webapp/bower.json @@ -1,6 +1,7 @@ { "name": "tez-ui", "dependencies": { + "alasql": "^0.4.0", "ember": "2.2.0", "ember-cli-shims": "0.0.6", "ember-cli-test-loader": "0.2.1", @@ -15,13 +16,13 @@ "font-awesome": "4.5.0", "jquery": "2.1.4", "jquery-ui": "1.11.4", - "moment": "2.12.0", + "moment": "2.29.4", "moment-timezone": "0.5.0", "numeral": "1.5.3", "snippet-ss": "1.11.0", "jquery-mousewheel": "3.1.13", "codemirror": "5.11.0", - "file-saver.js": "1.20150507.2", - "zip-js": "1.0.0" + "zip-js": "1.0.0", + "file-saver": "v1.2.0" } } diff --git a/tez-ui/src/main/webapp/config/configs.env b/tez-ui/src/main/webapp/config/configs.js similarity index 100% rename from tez-ui/src/main/webapp/config/configs.env rename to tez-ui/src/main/webapp/config/configs.js diff --git a/tez-ui/src/main/webapp/config/default-app-conf.js b/tez-ui/src/main/webapp/config/default-app-conf.js index 388ca4ce0f..a49a3e0134 100644 --- a/tez-ui/src/main/webapp/config/default-app-conf.js +++ b/tez-ui/src/main/webapp/config/default-app-conf.js @@ -194,6 +194,10 @@ module.exports = { // Tez App configurations counterName: "INPUT_RECORDS_PROCESSED", counterGroupName: "org.apache.tez.common.counters.TaskCounter", }, + { + counterName: "INPUT_SPLIT_LENGTH_BYTES", + counterGroupName: "org.apache.tez.common.counters.TaskCounter", + }, { counterName: "OUTPUT_RECORDS", counterGroupName: "org.apache.tez.common.counters.TaskCounter", @@ -226,6 +230,10 @@ module.exports = { // Tez App configurations counterName: "ADDITIONAL_SPILL_COUNT", counterGroupName: "org.apache.tez.common.counters.TaskCounter", }, + { + counterName: "SHUFFLE_CHUNK_COUNT", + counterGroupName: "org.apache.tez.common.counters.TaskCounter", + }, { counterName: "SHUFFLE_BYTES", counterGroupName: "org.apache.tez.common.counters.TaskCounter", @@ -321,6 +329,14 @@ module.exports = { // Tez App configurations counterName :"NUM_FAILED_UBERTASKS", counterGroupName :"org.apache.tez.common.counters.DAGCounter", }, + { + counterName :"AM_CPU_MILLISECONDS", + counterGroupName :"org.apache.tez.common.counters.DAGCounter", + }, + { + counterName :"AM_GC_TIME_MILLIS", + counterGroupName :"org.apache.tez.common.counters.DAGCounter", + }, { counterName: "REDUCE_OUTPUT_RECORDS", diff --git a/tez-ui/src/main/webapp/config/environment.js b/tez-ui/src/main/webapp/config/environment.js index 0c755ac700..68a16cf803 100644 --- a/tez-ui/src/main/webapp/config/environment.js +++ b/tez-ui/src/main/webapp/config/environment.js @@ -39,6 +39,10 @@ module.exports = function(environment) { 'child-src': "'self' 'unsafe-inline'", 'style-src': "'self' 'unsafe-inline'", 'script-src': "'self' 'unsafe-inline'" + }, + + moment: { + includeTimezone: '2010-2020' } }; diff --git a/tez-ui/src/main/webapp/ember-cli-build.js b/tez-ui/src/main/webapp/ember-cli-build.js index 9b52210af9..f34092c52a 100644 --- a/tez-ui/src/main/webapp/ember-cli-build.js +++ b/tez-ui/src/main/webapp/ember-cli-build.js @@ -44,7 +44,7 @@ module.exports = function(defaults) { var configEnv = new Funnel('config', { srcDir: '/', - include: ['*.env'], + include: ['configs.js'], destDir: '/config' }); var zipWorker = new Funnel('bower_components/zip-js', { @@ -64,13 +64,14 @@ module.exports = function(defaults) { app.import('bower_components/more-js/dist/more.js'); - app.import('bower_components/file-saver.js/FileSaver.js'); + app.import('bower_components/file-saver/FileSaver.js'); app.import('bower_components/zip-js/WebContent/zip.js'); app.import('bower_components/codemirror/lib/codemirror.js'); app.import('bower_components/codemirror/mode/sql/sql.js'); app.import('bower_components/codemirror/mode/pig/pig.js'); app.import('bower_components/codemirror/lib/codemirror.css'); + app.import('bower_components/alasql/dist/alasql.js'); return app.toTree(new MergeTrees([configEnv, zipWorker, copyFonts])); }; diff --git a/tez-ui/src/main/webapp/package.json b/tez-ui/src/main/webapp/package.json index 513de5271a..19ffc76e3a 100644 --- a/tez-ui/src/main/webapp/package.json +++ b/tez-ui/src/main/webapp/package.json @@ -19,13 +19,13 @@ }, "repository": { "type": "git", - "url": "https://git-wip-us.apache.org/repos/asf/tez.git" + "url": "https://gitbox.apache.org/repos/asf/tez.git" }, "engines": { "node": ">= 0.10.0" }, "devDependencies": { - "bower": "1.7.7", + "bower": "1.8.4", "broccoli-asset-rev": "2.4.2", "broccoli-funnel": "1.0.1", "broccoli-merge-trees": "1.1.1", @@ -61,8 +61,18 @@ "phantomjs-prebuilt": "2.1.13" }, "dependencies": { - "em-helpers": "0.8.0", - "em-table": "0.7.2", - "em-tgraph": "0.0.10" + "em-tgraph": "0.0.14" + }, + "resolutions": { + "**/form-data/async": "2.6.4", + "**/mkdirp/minimist": "1.2.6", + "**/optimist/minimist": "1.2.6", + "**/jsprim/json-schema": "0.4.0", + "jsonpointer": "4.1.0", + "cryptiles": "4.1.2", + "lodash.merge": "4.6.2", + "is-my-json-valid": "2.20.3", + "debug": "2.6.9", + "qs": "6.2.4" } } diff --git a/tez-ui/src/main/webapp/tests/integration/components/column-selector-test.js b/tez-ui/src/main/webapp/tests/integration/components/column-selector-test.js index 0034059eb6..9fe7d8a2d8 100644 --- a/tez-ui/src/main/webapp/tests/integration/components/column-selector-test.js +++ b/tez-ui/src/main/webapp/tests/integration/components/column-selector-test.js @@ -85,3 +85,23 @@ test('searchText test', function(assert) { assert.equal(this.$(".select-option").text().trim(), ''); }); + +test('case-insensitive searchText test', function(assert) { + + this.setProperties({ + searchText: "test", + content: { + visibleColumnIDs: { + testID: true, + }, + columns: [Ember.Object.create({ + id: "testID", + headerTitle: "Test Column" + })] + } + }); + + this.render(hbs`{{column-selector content=content searchText=searchText}}`); + + assert.equal(this.$(".select-option").text().trim(), 'Test Column'); +}); diff --git a/tez-ui/src/main/webapp/tests/integration/components/em-breadcrumbs-test.js b/tez-ui/src/main/webapp/tests/integration/components/em-breadcrumbs-test.js new file mode 100644 index 0000000000..63edbc491a --- /dev/null +++ b/tez-ui/src/main/webapp/tests/integration/components/em-breadcrumbs-test.js @@ -0,0 +1,107 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import Ember from 'ember'; + +import { moduleForComponent, test } from 'ember-qunit'; +import hbs from 'htmlbars-inline-precompile'; + +moduleForComponent('em-breadcrumbs', 'Integration | Component | em breadcrumbs', { + integration: true +}); + +test('Basic creation test', function(assert) { + + // Set any properties with this.set('myProperty', 'value'); + // Handle any actions with this.on('myAction', function(val) { ... });" + EOL + EOL + + + this.render(hbs`{{em-breadcrumbs}}`); + + assert.equal(this.$().text().trim(), ''); + + // Template block usage:" + EOL + + this.render(hbs` + {{#em-breadcrumbs}} + template block text + {{/em-breadcrumbs}} + `); + + assert.equal(this.$().text().trim(), ''); +}); + +test('Test with one link-to item', function(assert) { + var testItems = [{ + routeName: "foo", + text: "fooText" + }], + elements; + + this.set("items", testItems); + this.render(hbs`{{em-breadcrumbs items=items}}`); + + elements = this.$("li"); + + assert.equal(elements.length, 1); + assert.equal(Ember.$(elements[0]).text().trim(), testItems[0].text); + assert.equal(elements[0].title, testItems[0].text); + assert.equal(elements[0].style.maxWidth, "100%"); +}); + +test('Test with two link-to item', function(assert) { + var testItems = [{ + routeName: "foo", + text: "fooText" + },{ + routeName: "bar", + text: "barText" + }], + elements; + + this.set("items", testItems); + this.render(hbs`{{em-breadcrumbs items=items}}`); + + elements = this.$("li"); + + assert.equal(elements.length, 2); + + assert.equal(Ember.$(elements[0]).text().trim(), testItems[0].text); + assert.equal(elements[0].title, testItems[0].text); + assert.equal(elements[0].style.maxWidth, "50%"); + + assert.equal(Ember.$(elements[1]).text().trim(), testItems[1].text); + assert.equal(elements[1].title, testItems[1].text); + assert.equal(elements[1].style.maxWidth, "50%"); +}); + +test('Test with one anchor tag item', function(assert) { + var testItems = [{ + href: "foo.bar", + text: "fooText" + }], + elements; + + this.set("items", testItems); + this.render(hbs`{{em-breadcrumbs items=items}}`); + + elements = this.$("li"); + + assert.equal(elements.length, 1); + assert.equal(Ember.$(elements[0]).text().trim(), testItems[0].text); + assert.equal(elements[0].title, testItems[0].text); + assert.equal(elements[0].style.maxWidth, "100%"); +}); diff --git a/tez-ui/src/main/webapp/tests/integration/components/em-progress-test.js b/tez-ui/src/main/webapp/tests/integration/components/em-progress-test.js new file mode 100644 index 0000000000..8fcdfaf1d0 --- /dev/null +++ b/tez-ui/src/main/webapp/tests/integration/components/em-progress-test.js @@ -0,0 +1,73 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import { moduleForComponent, test } from 'ember-qunit'; +import hbs from 'htmlbars-inline-precompile'; + +moduleForComponent('em-progress', 'Integration | Component | em progress', { + integration: true +}); + +test('It renders', function(assert) { + + // Set any properties with this.set('myProperty', 'value'); + // Handle any actions with this.on('myAction', function(val) { ... });" + EOL + EOL + + + this.render(hbs`{{em-progress}}`); + + assert.equal(this.$().text().trim(), '0%'); + + this.render(hbs`{{#em-progress}}{{/em-progress}}`); + assert.equal(this.$().text().trim(), '0%'); +}); + +test('With a specific value', function(assert) { + this.render(hbs`{{em-progress value=0.5}}`); + assert.equal(this.$().text().trim(), '50%'); +}); + +test('Custom valueMin & valueMax', function(assert) { + this.render(hbs`{{em-progress value=15 valueMin=10 valueMax=20}}`); + assert.equal(this.$().text().trim(), '50%'); + + assert.notOk(this.$('.striped')[0], "Striped class added"); +}); + +test('Check for stripes & animation while in progress', function(assert) { + this.render(hbs`{{em-progress value=0.5 striped=true}}`); + + assert.equal(this.$().text().trim(), '50%'); + assert.ok(this.$('.striped')[0], "Striped class added"); + assert.ok(this.$('.animated')[0], "Animated class should be added!"); +}); + +test('Check for stripes & animation while starting', function(assert) { + this.render(hbs`{{em-progress value=0 striped=true}}`); + + assert.equal(this.$().text().trim(), '0%'); + assert.ok(this.$('.striped')[0], "Striped class added"); + assert.ok(!this.$('.animated')[0], "Animated class shouldn't be added!"); +}); + +test('Check for stripes & animation on completion', function(assert) { + this.render(hbs`{{em-progress value=1 striped=true}}`); + + assert.equal(this.$().text().trim(), '100%'); + assert.ok(this.$('.striped')[0], "Striped class added"); + assert.ok(!this.$('.animated')[0], "Animated class shouldn't be added!"); +}); diff --git a/tez-ui/src/main/webapp/tests/integration/components/em-table-cell-test.js b/tez-ui/src/main/webapp/tests/integration/components/em-table-cell-test.js new file mode 100644 index 0000000000..ccf535884e --- /dev/null +++ b/tez-ui/src/main/webapp/tests/integration/components/em-table-cell-test.js @@ -0,0 +1,45 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import Ember from 'ember'; + +import { moduleForComponent, test } from 'ember-qunit'; +import hbs from 'htmlbars-inline-precompile'; + +import ColumnDefinition from '../../../utils/column-definition'; + +moduleForComponent('em-table-cell', 'Integration | Component | em table cell', { + integration: true +}); + +test('Basic rendering test', function(assert) { + var columnDefinition = ColumnDefinition.create({ + id: 'id', + contentPath: 'keyA' + }), + row = Ember.Object.create({ + keyA: 'valueA', + keyB: 'valueB' + }); + + this.set('columnDefinition', columnDefinition); + this.set('row', row); + this.render(hbs`{{em-table-cell columnDefinition=columnDefinition row=row}}`); + + assert.equal(this.$().text().trim(), 'valueA'); +}); diff --git a/tez-ui/src/main/webapp/tests/integration/components/em-table-column-test.js b/tez-ui/src/main/webapp/tests/integration/components/em-table-column-test.js new file mode 100644 index 0000000000..96eff7af2e --- /dev/null +++ b/tez-ui/src/main/webapp/tests/integration/components/em-table-column-test.js @@ -0,0 +1,30 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import { moduleForComponent, test } from 'ember-qunit'; +import hbs from 'htmlbars-inline-precompile'; + +moduleForComponent('em-table-column', 'Integration | Component | em table column', { + integration: true +}); + +test('Basic rendering test', function(assert) { + this.render(hbs`{{em-table-column}}`); + + assert.equal(this.$().text().trim(), ''); +}); diff --git a/tez-ui/src/main/webapp/tests/integration/components/em-table-facet-panel-test.js b/tez-ui/src/main/webapp/tests/integration/components/em-table-facet-panel-test.js new file mode 100644 index 0000000000..cc0f1f0741 --- /dev/null +++ b/tez-ui/src/main/webapp/tests/integration/components/em-table-facet-panel-test.js @@ -0,0 +1,43 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import { moduleForComponent, test } from 'ember-qunit'; +import hbs from 'htmlbars-inline-precompile'; + +moduleForComponent('em-table-facet-panel', 'Integration | Component | em table facet panel', { + integration: true +}); + +test('Basic renders', function(assert) { + + // Set any properties with this.set('myProperty', 'value'); + // Handle any actions with this.on('myAction', function(val) { ... });" + EOL + EOL + + + this.render(hbs`{{em-table-facet-panel}}`); + + assert.equal(this.$().text().replace(/\n|\r\n|\r| /g, '').trim(), 'NotAvailable!'); + + // Template block usage:" + EOL + + this.render(hbs` + {{#em-table-facet-panel}} + template block text + {{/em-table-facet-panel}} + `); + + assert.equal(this.$().text().replace(/\n|\r\n|\r| /g, '').trim(), 'NotAvailable!'); +}); diff --git a/tez-ui/src/main/webapp/tests/integration/components/em-table-facet-panel-values-test.js b/tez-ui/src/main/webapp/tests/integration/components/em-table-facet-panel-values-test.js new file mode 100644 index 0000000000..f401a7da6e --- /dev/null +++ b/tez-ui/src/main/webapp/tests/integration/components/em-table-facet-panel-values-test.js @@ -0,0 +1,44 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import { moduleForComponent, test } from 'ember-qunit'; +import hbs from 'htmlbars-inline-precompile'; + +moduleForComponent('em-table-facet-panel-values', 'Integration | Component | em table facet panel values', { + integration: true +}); + +test('Basic render test', function(assert) { + + // Set any properties with this.set('myProperty', 'value'); + // Handle any actions with this.on('myAction', function(val) { ... });" + EOL + EOL + + + this.set("tmpFacetConditions", {}); + this.render(hbs`{{em-table-facet-panel-values tmpFacetConditions=tmpFacetConditions}}`); + + assert.ok(this.$().text().trim()); + + // Template block usage:" + EOL + + this.render(hbs` + {{#em-table-facet-panel-values tmpFacetConditions=tmpFacetConditions}} + template block text + {{/em-table-facet-panel-values}} + `); + + assert.ok(this.$().text().trim()); +}); diff --git a/tez-ui/src/main/webapp/tests/integration/components/em-table-header-cell-test.js b/tez-ui/src/main/webapp/tests/integration/components/em-table-header-cell-test.js new file mode 100644 index 0000000000..0c502ce9df --- /dev/null +++ b/tez-ui/src/main/webapp/tests/integration/components/em-table-header-cell-test.js @@ -0,0 +1,30 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import { moduleForComponent, test } from 'ember-qunit'; +import hbs from 'htmlbars-inline-precompile'; + +moduleForComponent('em-table-header-cell', 'Integration | Component | em table header cell', { + integration: true +}); + +test('Basic rendering test', function(assert) { + this.render(hbs`{{em-table-header-cell}}`); + + assert.equal(this.$().text().trim(), ''); +}); diff --git a/tez-ui/src/main/webapp/tests/integration/components/em-table-linked-cell-test.js b/tez-ui/src/main/webapp/tests/integration/components/em-table-linked-cell-test.js new file mode 100644 index 0000000000..7553c41014 --- /dev/null +++ b/tez-ui/src/main/webapp/tests/integration/components/em-table-linked-cell-test.js @@ -0,0 +1,30 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import { moduleForComponent, test } from 'ember-qunit'; +import hbs from 'htmlbars-inline-precompile'; + +moduleForComponent('em-table-linked-cell', 'Integration | Component | em table linked cell', { + integration: true +}); + +test('Basic rendering test', function(assert) { + this.render(hbs`{{em-table-linked-cell}}`); + + assert.equal(this.$().text().trim(), 'Not Available!'); +}); diff --git a/tez-ui/src/main/webapp/tests/integration/components/em-table-pagination-ui-test.js b/tez-ui/src/main/webapp/tests/integration/components/em-table-pagination-ui-test.js new file mode 100644 index 0000000000..0333d0cf11 --- /dev/null +++ b/tez-ui/src/main/webapp/tests/integration/components/em-table-pagination-ui-test.js @@ -0,0 +1,204 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import Ember from 'ember'; + +import DataProcessor from '../../../utils/data-processor'; +import TableDefinition from '../../../utils/table-definition'; + +import { moduleForComponent, test } from 'ember-qunit'; +import hbs from 'htmlbars-inline-precompile'; + +moduleForComponent('em-table-pagination-ui', 'Integration | Component | em table pagination ui', { + integration: true +}); + +test('Basic rendering test', function(assert) { + var customRowCount = 25, + definition = TableDefinition.create({ + rowCount: customRowCount + }), + processor; + + Ember.run(function () { + processor = DataProcessor.create({ + tableDefinition: definition, + rows: Ember.A([Ember.Object.create()]) + }); + }); + + this.set('definition', definition); + this.set('processor', processor); + this.render(hbs`{{em-table-pagination-ui tableDefinition=definition dataProcessor=processor}}`); + + var paginationItems = this.$('li'); + assert.equal(paginationItems.length, 1); + assert.equal($(paginationItems[0]).text().trim(), "1"); + + var rowSelection = this.$('select')[0]; + assert.ok(rowSelection); + assert.equal($(rowSelection).val(), customRowCount); +}); + +test('No data test', function(assert) { + var customRowCount = 2, + definition = TableDefinition.create({ + rowCount: customRowCount + }), + processor; + + Ember.run(function () { + processor = DataProcessor.create({ + tableDefinition: definition, + rows: Ember.A() + }); + }); + + this.set('definition', definition); + this.set('processor', processor); + this.render(hbs`{{em-table-pagination-ui tableDefinition=definition dataProcessor=processor}}`); + + var paginationItems = this.$('li'); + assert.equal(paginationItems.length, 0); +}); + +test('Multiple page test; without first & last', function(assert) { + var customRowCount = 2, + definition = TableDefinition.create({ + rowCount: customRowCount + }), + processor; + + Ember.run(function () { + processor = DataProcessor.create({ + tableDefinition: definition, + rows: Ember.A([Ember.Object.create(), Ember.Object.create(), Ember.Object.create()]) + }); + }); + + this.set('definition', definition); + this.set('processor', processor); + this.render(hbs`{{em-table-pagination-ui tableDefinition=definition dataProcessor=processor}}`); + + var paginationItems = this.$('li'); + assert.equal(paginationItems.length, 2); + assert.equal($(paginationItems[0]).text().trim(), "1"); + assert.equal($(paginationItems[1]).text().trim(), "2"); +}); + +test('Display last test', function(assert) { + var customRowCount = 5, + definition = TableDefinition.create({ + rowCount: customRowCount + }), + processor, + rows = []; + + for(var i = 0; i < 100; i++) { + rows.push(Ember.Object.create()); + } + + Ember.run(function () { + processor = DataProcessor.create({ + tableDefinition: definition, + rows: Ember.A(rows) + }); + }); + + this.set('definition', definition); + this.set('processor', processor); + this.render(hbs`{{em-table-pagination-ui tableDefinition=definition dataProcessor=processor}}`); + + var paginationItems = this.$('li'); + assert.equal(paginationItems.length, 6); + assert.equal($(paginationItems[0]).text().trim(), "1"); + assert.equal($(paginationItems[1]).text().trim(), "2"); + assert.equal($(paginationItems[2]).text().trim(), "3"); + assert.equal($(paginationItems[3]).text().trim(), "4"); + assert.equal($(paginationItems[4]).text().trim(), "5"); + assert.equal($(paginationItems[5]).text().trim(), "Last - 20"); +}); + +test('Display first test', function(assert) { + var customRowCount = 5, + definition = TableDefinition.create({ + pageNum: 20, + rowCount: customRowCount + }), + processor, + rows = []; + + for(var i = 0; i < 100; i++) { + rows.push(Ember.Object.create()); + } + + Ember.run(function () { + processor = DataProcessor.create({ + tableDefinition: definition, + rows: Ember.A(rows) + }); + }); + + this.set('definition', definition); + this.set('processor', processor); + this.render(hbs`{{em-table-pagination-ui tableDefinition=definition dataProcessor=processor}}`); + + var paginationItems = this.$('li'); + assert.equal(paginationItems.length, 6); + assert.equal($(paginationItems[0]).text().trim(), "First"); + assert.equal($(paginationItems[1]).text().trim(), "16"); + assert.equal($(paginationItems[2]).text().trim(), "17"); + assert.equal($(paginationItems[3]).text().trim(), "18"); + assert.equal($(paginationItems[4]).text().trim(), "19"); + assert.equal($(paginationItems[5]).text().trim(), "20"); +}); + +test('Display first & last test', function(assert) { + var customRowCount = 5, + definition = TableDefinition.create({ + pageNum: 10, + rowCount: customRowCount + }), + processor, + rows = []; + + for(var i = 0; i < 100; i++) { + rows.push(Ember.Object.create()); + } + + Ember.run(function () { + processor = DataProcessor.create({ + tableDefinition: definition, + rows: Ember.A(rows) + }); + }); + + this.set('definition', definition); + this.set('processor', processor); + this.render(hbs`{{em-table-pagination-ui tableDefinition=definition dataProcessor=processor}}`); + + var paginationItems = this.$('li'); + assert.equal(paginationItems.length, 7); + assert.equal($(paginationItems[0]).text().trim(), "First"); + assert.equal($(paginationItems[1]).text().trim(), "8"); + assert.equal($(paginationItems[2]).text().trim(), "9"); + assert.equal($(paginationItems[3]).text().trim(), "10"); + assert.equal($(paginationItems[4]).text().trim(), "11"); + assert.equal($(paginationItems[5]).text().trim(), "12"); + assert.equal($(paginationItems[6]).text().trim(), "Last - 20"); +}); diff --git a/tez-ui/src/main/webapp/tests/integration/components/em-table-progress-cell-test.js b/tez-ui/src/main/webapp/tests/integration/components/em-table-progress-cell-test.js new file mode 100644 index 0000000000..b7eced31d6 --- /dev/null +++ b/tez-ui/src/main/webapp/tests/integration/components/em-table-progress-cell-test.js @@ -0,0 +1,43 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import { moduleForComponent, test } from 'ember-qunit'; +import hbs from 'htmlbars-inline-precompile'; + +moduleForComponent('em-table-progress-cell', 'Integration | Component | em table progress cell', { + integration: true +}); + +test('Basic creation test', function(assert) { + + // Set any properties with this.set('myProperty', 'value'); + // Handle any actions with this.on('myAction', function(val) { ... });" + EOL + EOL + + + this.render(hbs`{{em-table-progress-cell content=0.5}}`); + + assert.equal(this.$().text().trim(), '50%'); + + // Template block usage:" + EOL + + this.render(hbs` + {{#em-table-progress-cell content=0.5}} + template block text + {{/em-table-progress-cell}} + `); + + assert.equal(this.$().text().trim(), '50%'); +}); diff --git a/tez-ui/src/main/webapp/tests/integration/components/em-table-search-ui-test.js b/tez-ui/src/main/webapp/tests/integration/components/em-table-search-ui-test.js new file mode 100644 index 0000000000..0cd2bbca15 --- /dev/null +++ b/tez-ui/src/main/webapp/tests/integration/components/em-table-search-ui-test.js @@ -0,0 +1,30 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import { moduleForComponent, test } from 'ember-qunit'; +import hbs from 'htmlbars-inline-precompile'; + +moduleForComponent('em-table-search-ui', 'Integration | Component | em table search ui', { + integration: true +}); + +test('Basic rendering test', function(assert) { + this.render(hbs`{{em-table-search-ui}}`); + + assert.equal(this.$().text().trim(), 'Search'); +}); diff --git a/tez-ui/src/main/webapp/tests/integration/components/em-table-test.js b/tez-ui/src/main/webapp/tests/integration/components/em-table-test.js new file mode 100644 index 0000000000..96baf79fa9 --- /dev/null +++ b/tez-ui/src/main/webapp/tests/integration/components/em-table-test.js @@ -0,0 +1,48 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import { moduleForComponent, test } from 'ember-qunit'; +import hbs from 'htmlbars-inline-precompile'; + +import TableDefinition from '../../../utils/table-definition'; +import ColumnDefinition from '../../../utils/column-definition'; + +moduleForComponent('em-table', 'Integration | Component | em table', { + integration: true +}); + +test('Basic rendering test', function(assert) { + this.render(hbs`{{em-table}}`); + + assert.equal(this.$('.table-message').text().trim(), 'No columns available!'); +}); + +test('Records missing test', function(assert) { + var definition = TableDefinition.create({ + recordType: "vertex" + }); + + this.set("columns", [ColumnDefinition.fillerColumn]); + + this.render(hbs`{{em-table columns=columns}}`); + assert.equal(this.$('.table-message').text().trim(), 'No records available!'); + + this.set("definition", definition); + this.render(hbs`{{em-table columns=columns definition=definition}}`); + assert.equal(this.$('.table-message').text().trim(), 'No vertices available!'); +}); diff --git a/tez-ui/src/main/webapp/tests/integration/components/home-table-controls-test.js b/tez-ui/src/main/webapp/tests/integration/components/home-table-controls-test.js index ccce2dbe29..c35c339e3c 100644 --- a/tez-ui/src/main/webapp/tests/integration/components/home-table-controls-test.js +++ b/tez-ui/src/main/webapp/tests/integration/components/home-table-controls-test.js @@ -66,7 +66,7 @@ test('countersLoaded test', function(assert) { })] }); this.render(hbs`{{home-table-controls dataProcessor=dataProcessor}}`); - assert.equal(this.$().find("button").attr("class").split(" ").indexOf("no-visible"), 2); + assert.equal(this.$().find("button").attr("class").split(" ").indexOf("no-visible"), -1); this.set("dataProcessor", { processedRows: [Ember.Object.create({ diff --git a/tez-ui/src/main/webapp/tests/integration/em-table-status-cell-test.js b/tez-ui/src/main/webapp/tests/integration/em-table-status-cell-test.js new file mode 100644 index 0000000000..31483395e9 --- /dev/null +++ b/tez-ui/src/main/webapp/tests/integration/em-table-status-cell-test.js @@ -0,0 +1,40 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import { moduleForComponent, test } from 'ember-qunit'; +import hbs from 'htmlbars-inline-precompile'; + +moduleForComponent('em-table-status-cell', 'Integration | Component | em table status cell', { + integration: true +}); + +test('Basic creation test', function(assert) { + + this.render(hbs`{{em-table-status-cell}}`); + + assert.equal(this.$().text().trim(), 'Not Available!'); + + // Template block usage:" + EOL + + this.render(hbs` + {{#em-table-status-cell}} + template block text + {{/em-table-status-cell}} + `); + + assert.equal(this.$().text().trim(), 'Not Available!'); +}); diff --git a/tez-ui/src/main/webapp/tests/unit/helpers/txt-test.js b/tez-ui/src/main/webapp/tests/unit/helpers/txt-test.js new file mode 100644 index 0000000000..18d3de7ace --- /dev/null +++ b/tez-ui/src/main/webapp/tests/unit/helpers/txt-test.js @@ -0,0 +1,59 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import { txt } from '../../../helpers/txt'; +import { module, test } from 'qunit'; + +module('Unit | Helper | txt'); + +test('txt: created', function(assert) { + assert.ok(txt); +}); + +test('txt: String', function(assert) { + assert.equal(txt(["Abc"], {}), "Abc"); + assert.equal(txt(null, {}).string, ' Not Available! '); +}); + +test('txt: String - success', function(assert) { + assert.equal(txt(["Abc"], {}), "Abc"); + assert.equal(txt(null, {}).string, ' Not Available! '); + assert.equal(txt([null], {}).string, ' Not Available! '); +}); + +test('txt: String - error', function(assert) { + var obj = {}; + + obj.toString = null; + assert.equal(txt([obj], {}).string, ' Invalid Data! '); +}); + +test('txt: json', function(assert) { + var obj = { + x: 1, + y: 2 + }; + assert.equal(txt([obj], { + type: "json", + }).string, '{\n "x": 1,\n "y": 2\n}'); +}); + +test('txt: error', function(assert) { + var err = new Error("testError"); + assert.equal(txt([err], {}).string, ' testError '); +}); diff --git a/tez-ui/src/main/webapp/tests/unit/models/dag-test.js b/tez-ui/src/main/webapp/tests/unit/models/dag-test.js index 468c87f0e5..7be7def930 100644 --- a/tez-ui/src/main/webapp/tests/unit/models/dag-test.js +++ b/tez-ui/src/main/webapp/tests/unit/models/dag-test.js @@ -62,6 +62,8 @@ test('Basic creation test', function(assert) { assert.ok(model.info); assert.ok(model.amWsVersion); + assert.ok(model.failedTaskAttempts); + assert.ok(model.finalStatus); }); test('app loadType test', function(assert) { @@ -83,6 +85,20 @@ test('app loadType test', function(assert) { assert.equal(loadType(record), undefined); }); +test('status test', function(assert) { + let model = this.subject(); + + Ember.run(function () { + model.set("status", "SUCCEEDED"); + assert.equal(model.get("status"), "SUCCEEDED"); + assert.equal(model.get("finalStatus"), "SUCCEEDED"); + + model.set("failedTaskAttempts", 1); + assert.equal(model.get("status"), "SUCCEEDED"); + assert.equal(model.get("finalStatus"), "SUCCEEDED_WITH_FAILURES"); + }); +}); + test('queue test', function(assert) { let model = this.subject(), queueName = "queueName", @@ -138,4 +154,4 @@ test('vertices, edges & vertexGroups test', function(assert) { assert.equal(model.get("edges"), testEdges); assert.equal(model.get("vertexGroups"), testVertexGroups); }); -}); \ No newline at end of file +}); diff --git a/tez-ui/src/main/webapp/tests/unit/routes/server-side-ops-test.js b/tez-ui/src/main/webapp/tests/unit/routes/server-side-ops-test.js index 59e04f75ba..055da433eb 100644 --- a/tez-ui/src/main/webapp/tests/unit/routes/server-side-ops-test.js +++ b/tez-ui/src/main/webapp/tests/unit/routes/server-side-ops-test.js @@ -136,7 +136,7 @@ test('loadNewPage test', function(assert) { let currentQuery = { val: {} }, - data = [], + data = {content: []}, fromId = "id1", route = this.subject({ controller: Ember.Object.create(), @@ -144,7 +144,7 @@ test('loadNewPage test', function(assert) { fromId: fromId, loadedValue: { pushObjects: function (objs) { - assert.equal(data, objs); + assert.equal(data.content, objs); } }, load: function (value, query) { diff --git a/tez-ui/src/main/webapp/tests/unit/serializers/dag-test.js b/tez-ui/src/main/webapp/tests/unit/serializers/dag-test.js index f57d1af378..80f41b6bbb 100644 --- a/tez-ui/src/main/webapp/tests/unit/serializers/dag-test.js +++ b/tez-ui/src/main/webapp/tests/unit/serializers/dag-test.js @@ -36,7 +36,7 @@ test('Basic creation test', function(assert) { assert.ok(serializer.maps.containerLogs); assert.ok(serializer.maps.vertexIdNameMap); - assert.equal(Object.keys(serializer.get("maps")).length, 12 + 7); //12 own & 9 inherited (2 overwritten) + assert.equal(Object.keys(serializer.get("maps")).length, 13 + 7); //13 own & 9 inherited (2 overwritten) }); test('atsStatus test', function(assert) { @@ -110,8 +110,8 @@ test('containerLogs test', function(assert) { }), [], "No logs"); assert.deepEqual(mapper({ - otherinfo: {inProgressLogsURL_1: "foo", inProgressLogsURL_2: "bar"}, - }), [{text: "1", href: "http://foo"}, {text: "2", href: "http://bar"}], "2 logs"); + otherinfo: {inProgressLogsURL_1: "http://foo", inProgressLogsURL_2: "https://bar"}, + }), [{text: "1", href: "http://foo"}, {text: "2", href: "https://bar"}], "2 logs"); }); test('vertexIdNameMap test', function(assert) { diff --git a/tez-ui/src/main/webapp/tests/unit/services/hosts-test.js b/tez-ui/src/main/webapp/tests/unit/services/hosts-test.js index 026f21b304..afa527cb03 100644 --- a/tez-ui/src/main/webapp/tests/unit/services/hosts-test.js +++ b/tez-ui/src/main/webapp/tests/unit/services/hosts-test.js @@ -36,11 +36,11 @@ test('Test correctProtocol', function(assert) { // Correction assert.equal(service.correctProtocol("localhost:8088"), "http://localhost:8088"); - assert.equal(service.correctProtocol("https://localhost:8088"), "http://localhost:8088"); + assert.equal(service.correctProtocol("https://localhost:8088"), "https://localhost:8088"); assert.equal(service.correctProtocol("file://localhost:8088"), "http://localhost:8088"); assert.equal(service.correctProtocol("localhost:8088", "http:"), "http://localhost:8088"); - assert.equal(service.correctProtocol("https://localhost:8088", "http:"), "http://localhost:8088"); + assert.equal(service.correctProtocol("https://localhost:8088", "http:"), "https://localhost:8088"); assert.equal(service.correctProtocol("file://localhost:8088", "http:"), "http://localhost:8088"); assert.equal(service.correctProtocol("localhost:8088", "https:"), "https://localhost:8088"); @@ -72,6 +72,6 @@ test('Test host URLs with ENV set', function(assert) { rm: "https://localhost:4444" } }; - assert.equal(service.get("timeline"), "http://localhost:3333"); - assert.equal(service.get("rm"), "http://localhost:4444"); + assert.equal(service.get("timeline"), "https://localhost:3333"); + assert.equal(service.get("rm"), "https://localhost:4444"); }); diff --git a/tez-ui/src/main/webapp/tests/unit/utils/column-definition-test.js b/tez-ui/src/main/webapp/tests/unit/utils/column-definition-test.js new file mode 100644 index 0000000000..5ee9a49023 --- /dev/null +++ b/tez-ui/src/main/webapp/tests/unit/utils/column-definition-test.js @@ -0,0 +1,104 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import Ember from 'ember'; +import ColumnDefinition from '../../../utils/column-definition'; +import { module, test } from 'qunit'; + +module('Unit | Utility | column definition'); + +test('Class creation test', function(assert) { + assert.ok(ColumnDefinition); + + assert.ok(ColumnDefinition.make); + assert.ok(ColumnDefinition.makeFromModel); +}); + +test('make - Instance creation test', function(assert) { + + var definition = ColumnDefinition.make({ + id: "testId" + }); + var definitions = ColumnDefinition.make([{ + id: "testId 1" + },{ + id: "testId 2" + }]); + + // Single + assert.ok(definition); + + // Multiple + assert.ok(definitions); + assert.ok(Array.isArray(definitions)); + assert.equal(definitions.length, 2); +}); + +test('make - Instance creation failure test', function(assert) { + assert.throws(function () { + ColumnDefinition.make({}); + }); +}); + +test('makeFromModel test', function(assert) { + var attributes = Ember.Map.create(), + DummyModel = Ember.Object.create({ + attributes: attributes + }), + getCellContent = function () {}, + columns; + + attributes.set("attr1", "path1"); + attributes.set("attr2", "path2"); + attributes.set("attr3", "path3"); + + columns = ColumnDefinition.makeFromModel(DummyModel, { + getCellContent: getCellContent + }); + + assert.equal(columns.length, 3); + assert.equal(columns[0].id, "attr1"); + assert.equal(columns[0].headerTitle, "Attr1"); + assert.equal(columns[0].contentPath, "attr1"); + assert.equal(columns[0].getCellContent, getCellContent); +}); + +test('Instance test', function(assert) { + var definition = ColumnDefinition.make({ + id: "testId", + contentPath: "a.b" + }); + var data = Ember.Object.create({ + a: { + b: 42 + } + }); + + assert.ok(definition.getCellContent); + assert.ok(definition.getSearchValue); + assert.ok(definition.getSortValue); + + assert.equal(definition.id, "testId"); + assert.equal(definition.headerTitle, "Not Available!"); + assert.equal(definition.minWidth, "150px"); + assert.equal(definition.contentPath, "a.b"); + + assert.equal(definition.getCellContent(data), 42); + assert.equal(definition.getSearchValue(data), 42); + assert.equal(definition.getSortValue(data), 42); +}); diff --git a/tez-ui/src/main/webapp/tests/unit/utils/data-processor-test.js b/tez-ui/src/main/webapp/tests/unit/utils/data-processor-test.js new file mode 100644 index 0000000000..58f52dd013 --- /dev/null +++ b/tez-ui/src/main/webapp/tests/unit/utils/data-processor-test.js @@ -0,0 +1,137 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import Ember from 'ember'; + +import DataProcessor from '../../../utils/data-processor'; +import ColumnDefinition from '../../../utils/column-definition'; +import { module, test } from 'qunit'; + +module('Unit | Utility | data processor'); + +test('Class creation test', function(assert) { + assert.ok(DataProcessor); +}); + +test('Instance default test', function(assert) { + var processor; + + Ember.run(function () { + processor = DataProcessor.create({ + tableDefinition: Ember.Object.create(), + startSearch: function () { + // Test Search + }, + startSort: function () { + // Test Sort + } + }); + }); + + assert.ok(processor); + assert.equal(processor.get('isSorting'), false); + assert.equal(processor.get('isSearching'), false); + + assert.ok(processor._searchObserver); + assert.ok(processor._sortObserver); + assert.ok(processor.startSearch); + assert.ok(processor.startSort); + assert.ok(processor.compareFunction); + assert.ok(processor.totalPages); + assert.ok(processor.processedRows); +}); + +test('compareFunction test', function(assert) { + var processor; + + Ember.run(function () { + processor = DataProcessor.create({ + tableDefinition: Ember.Object.create(), + startSearch: function () {}, + startSort: function () {} + }); + }); + + assert.equal(processor.compareFunction(1, 1), 0); + assert.equal(processor.compareFunction(1, 2), -1); + assert.equal(processor.compareFunction(2, 1), 1); + + assert.equal(processor.compareFunction("a", "a"), 0); + assert.equal(processor.compareFunction("a", "b"), -1); + assert.equal(processor.compareFunction("b", "a"), 1); + + assert.equal(processor.compareFunction(null, null), -1); + assert.equal(processor.compareFunction(1, null), 1); + assert.equal(processor.compareFunction(null, 2), -1); + assert.equal(processor.compareFunction("a", null), 1); + assert.equal(processor.compareFunction(null, "b"), -1); + + assert.equal(processor.compareFunction(undefined, undefined), -1); + assert.equal(processor.compareFunction(1, undefined), 1); + assert.equal(processor.compareFunction(undefined, 2), -1); + assert.equal(processor.compareFunction("a", undefined), 1); + assert.equal(processor.compareFunction(undefined, "b"), -1); +}); + +test('startSearch test', function(assert) { + var processor, + runLater = Ember.run.later; + + assert.expect(3); + + Ember.run.later = function (callback) { + callback(); + assert.equal(processor.get("_searchedRows.length"), 2); + assert.equal(processor.get("_searchedRows.0.foo"), "Foo1"); + assert.equal(processor.get("_searchedRows.1.foo"), "Foo12"); + + Ember.run.later = runLater; // Reset + }; + + Ember.run(function () { + processor = DataProcessor.create({ + tableDefinition: Ember.Object.create({ + searchText: "foo1", + columns: [ColumnDefinition.create({ + id: "foo", + contentPath: 'foo' + }), ColumnDefinition.create({ + id: "bar", + contentPath: 'bar' + })] + }), + startSort: function () { + // Test Sort + }, + _sortedRows: [Ember.Object.create({ + foo: "Foo1", + bar: "Bar1" + }), Ember.Object.create({ + foo: "Foo12", + bar: "Bar2" + }), Ember.Object.create({ + foo: "Foo3", + bar: "Bar3" + }), Ember.Object.create({ + foo: "Foo4", + bar: "Bar4" + })], + }); + }); + +}); diff --git a/tez-ui/src/main/webapp/tests/unit/utils/facet-types-test.js b/tez-ui/src/main/webapp/tests/unit/utils/facet-types-test.js new file mode 100644 index 0000000000..f3af95249f --- /dev/null +++ b/tez-ui/src/main/webapp/tests/unit/utils/facet-types-test.js @@ -0,0 +1,28 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import facetTypes from '../../../utils/facet-types'; +import { module, test } from 'qunit'; + +module('Unit | Utility | facet types'); + +test('Basic creation test', function(assert) { + assert.ok(facetTypes); + + assert.ok(facetTypes.VALUES); +}); diff --git a/tez-ui/src/main/webapp/tests/unit/utils/formatters-test.js b/tez-ui/src/main/webapp/tests/unit/utils/formatters-test.js new file mode 100644 index 0000000000..4ecc14309c --- /dev/null +++ b/tez-ui/src/main/webapp/tests/unit/utils/formatters-test.js @@ -0,0 +1,99 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import fmts from '../../../utils/formatters'; +import { module, test } from 'qunit'; + +import Ember from 'ember'; + +module('Unit | Utility | formatters'); + +test('Formatter functions created', function(assert) { + assert.ok(fmts); + + assert.ok(fmts.date); + assert.ok(fmts.duration); + assert.ok(fmts.number); + assert.ok(fmts.memory); +}); + +test('duration', function(assert) { + var options = { + format: "long" + }; + assert.equal(fmts.duration(0, options), "0 millisecond"); + assert.equal(fmts.duration(1, options), "1 millisecond"); + assert.equal(fmts.duration(60, options), "60 milliseconds"); + assert.equal(fmts.duration(6000, options), "6 seconds"); + assert.equal(fmts.duration(66000, options), "1 minute 6 seconds"); + assert.equal(fmts.duration(666000, options), "11 minutes 6 seconds"); + assert.equal(fmts.duration(6666000, options), "1 hour 51 minutes 6 seconds"); + assert.equal(fmts.duration(66666000, options), "18 hours 31 minutes 6 seconds"); + + options = { + format: "short" + }; // By default format = short + assert.equal(fmts.duration(0, options), "0 msec"); + assert.equal(fmts.duration(60, options), "60 msecs"); + assert.equal(fmts.duration(6000, options), "6 secs"); + assert.equal(fmts.duration(66000, options), "1 min 6 secs"); + assert.equal(fmts.duration(666000, options), "11 mins 6 secs"); + assert.equal(fmts.duration(6666000, options), "1 hr 51 mins 6 secs"); + assert.equal(fmts.duration(66666000, options), "18 hrs 31 mins 6 secs"); + + assert.equal(fmts.duration(60.4, options), "60 msecs"); + assert.equal(fmts.duration(60.6, options), "61 msecs"); + + options = {}; // By default format = xshort + assert.equal(fmts.duration(0, options), "0ms"); + assert.equal(fmts.duration(60, options), "60ms"); + assert.equal(fmts.duration(6000, options), "6s"); + assert.equal(fmts.duration(66000, options), "1m 6s"); + assert.equal(fmts.duration(666000, options), "11m 6s"); + assert.equal(fmts.duration(6666000, options), "1h 51m 6s"); + assert.equal(fmts.duration(66666000, options), "18h 31m 6s"); +}); + +test('number', function(assert) { + assert.equal(fmts.number(6000, {}), "6,000"); + assert.equal(fmts.number(6000000, {}), "6,000,000"); +}); + +test('memory', function(assert) { + assert.equal(fmts.memory(0, {}), "0 B"); + assert.equal(fmts.memory(600, {}), "600 B"); + assert.equal(fmts.memory(1024, {}), "1 KB"); + assert.equal(fmts.memory(1024 * 1024, {}), "1 MB"); + assert.equal(fmts.memory(1024 * 1024 * 1024, {}), "1 GB"); + assert.equal(fmts.memory(1024 * 1024 * 1024 * 1024, {}), "1 TB"); +}); + +test('json', function(assert) { + var str = "testString", + complexObj = Ember.Object.create(); + + assert.equal(fmts.json(str, {}), str); + assert.equal(fmts.json(complexObj, {}), complexObj); + + assert.equal(fmts.json(null, {}), null); + assert.equal(fmts.json(undefined, {}), undefined); + + assert.equal(fmts.json({x: 1}, {}), '{\n "x": 1\n}'); + assert.equal(fmts.json({x: 1, y: 2}, {space: 1}), '{\n "x": 1,\n "y": 2\n}'); + assert.equal(fmts.json({x: 1, y: {z: 3}}, {space: 1}), '{\n "x": 1,\n "y": {\n "z": 3\n }\n}'); +}); diff --git a/tez-ui/src/main/webapp/tests/unit/utils/sql-test.js b/tez-ui/src/main/webapp/tests/unit/utils/sql-test.js new file mode 100644 index 0000000000..7aed218801 --- /dev/null +++ b/tez-ui/src/main/webapp/tests/unit/utils/sql-test.js @@ -0,0 +1,90 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import SQL from '../../../utils/sql'; +import ColumnDefinition from '../../../utils/column-definition'; +import { module, test } from 'qunit'; + +module('Unit | Utility | sql'); + +test('Class creation test', function(assert) { + var sql = SQL.create(); + + assert.ok(sql.constructQuery); + assert.ok(sql.validateClause); + assert.ok(sql.normaliseClause); + assert.ok(sql.search); +}); + +test('constructQuery test', function(assert) { + var sql = SQL.create(); + + assert.equal(sql.constructQuery("x = y"), "SELECT * FROM ? WHERE x = y"); +}); + +test('validateClause test', function(assert) { + var sql = SQL.create(); + + assert.ok(sql.validateClause("x = y")); + assert.ok(sql.validateClause("x = y AND a = b")); + assert.ok(sql.validateClause("(x = y OR y = z) AND a = b")); + assert.ok(sql.validateClause("x BETWEEN 1 AND 2")); + + assert.notOk(sql.validateClause("foo")); + assert.notOk(sql.validateClause("foo bar")); + assert.notOk(sql.validateClause("^[a-z0-9_-]{3,16}$")); + assert.notOk(sql.validateClause("^[a-z0-9_-]{6,18}$")); + assert.notOk(sql.validateClause("^[a-z0-9-]+$")); +}); + +test('normaliseClause test', function(assert) { + var sql = SQL.create(), + column = ColumnDefinition.create({ + headerTitle: "Column Header", + id: "columnID", + contentPath: "col" + }); + + assert.equal(sql.normaliseClause('"Column Header" = value', [column]), "columnID = value"); + assert.equal(sql.normaliseClause('"Another Column Header" = value', [column]), '"Another Column Header" = value'); +}); + +test('search test', function(assert) { + var sql = SQL.create(), + data = [{ + colA: "x1", + colB: "y1" + }, { + colA: "x2", + colB: "y2" + }, { + colA: "x1", + colB: "y3" + }], + columns = [ColumnDefinition.create({ + headerTitle: "Column A", + id: "colA", + contentPath: "colA" + })]; + + var result = sql.search('"Column A" = "x1"', data, columns); + + assert.equal(result.length, 2); + assert.equal(result[0].colB, "y1"); + assert.equal(result[1].colB, "y3"); +}); diff --git a/tez-ui/src/main/webapp/tests/unit/utils/table-definition-test.js b/tez-ui/src/main/webapp/tests/unit/utils/table-definition-test.js new file mode 100644 index 0000000000..234994b192 --- /dev/null +++ b/tez-ui/src/main/webapp/tests/unit/utils/table-definition-test.js @@ -0,0 +1,52 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import TableDefinition from '../../../utils/table-definition'; +import { module, test } from 'qunit'; + +module('Unit | Utility | table definition'); + +test('Class creation test', function(assert) { + assert.ok(TableDefinition); +}); + +test('Default instance test', function(assert) { + var definition = TableDefinition.create(); + + assert.ok(definition); + + assert.equal(definition.pageNum, 1); + assert.equal(definition.rowCount, 10); + assert.equal(definition.minRowsForFooter, 25); +}); + +test('Page-num reset test', function(assert) { + var definition = TableDefinition.create(); + + assert.equal(definition.pageNum, 1); + + definition.set("pageNum", 5); + assert.equal(definition.pageNum, 5); + + definition.set("searchText", "x"); + assert.equal(definition.pageNum, 1); + + definition.set("pageNum", 5); + definition.set("rowCount", 5); + assert.equal(definition.pageNum, 1); +}); diff --git a/tez-ui/src/main/webapp/yarn.lock b/tez-ui/src/main/webapp/yarn.lock index bf694fb2c3..7df8ea0a57 100644 --- a/tez-ui/src/main/webapp/yarn.lock +++ b/tez-ui/src/main/webapp/yarn.lock @@ -47,10 +47,6 @@ amdefine@>=0.0.4: version "1.0.1" resolved "https://registry.yarnpkg.com/amdefine/-/amdefine-1.0.1.tgz#4a5282ac164729e93619bcfd3ad151f817ce91f5" -ansi-regex@*, ansi-regex@^2.0.0: - version "2.1.1" - resolved "https://registry.yarnpkg.com/ansi-regex/-/ansi-regex-2.1.1.tgz#c3b33ab5ee360d86e0e628f0468ae7ef27d654df" - ansi-regex@^0.2.0, ansi-regex@^0.2.1: version "0.2.1" resolved "https://registry.yarnpkg.com/ansi-regex/-/ansi-regex-0.2.1.tgz#0d8e946967a3d8143f93e24e298525fc1b2235f9" @@ -59,6 +55,10 @@ ansi-regex@^1.0.0: version "1.1.1" resolved "https://registry.yarnpkg.com/ansi-regex/-/ansi-regex-1.1.1.tgz#41c847194646375e6a1a5d10c3ca054ef9fc980d" +ansi-regex@^2.0.0: + version "2.1.1" + resolved "https://registry.yarnpkg.com/ansi-regex/-/ansi-regex-2.1.1.tgz#c3b33ab5ee360d86e0e628f0468ae7ef27d654df" + ansi-styles@^1.1.0: version "1.1.0" resolved "https://registry.yarnpkg.com/ansi-styles/-/ansi-styles-1.1.0.tgz#eaecbf66cd706882760b2f4691582b8f55d7a7de" @@ -187,10 +187,6 @@ ast-types@0.8.12: version "0.8.12" resolved "https://registry.yarnpkg.com/ast-types/-/ast-types-0.8.12.tgz#a0d90e4351bb887716c83fd637ebf818af4adfcc" -ast-types@0.8.15: - version "0.8.15" - resolved "https://registry.yarnpkg.com/ast-types/-/ast-types-0.8.15.tgz#8eef0827f04dff0ec8857ba925abe3fea6194e52" - ast-types@0.9.6: version "0.9.6" resolved "https://registry.yarnpkg.com/ast-types/-/ast-types-0.9.6.tgz#102c9e9e9005d3e7e3829bf0c4fa24ee862ee9b9" @@ -216,16 +212,16 @@ async@0.9.0: version "0.9.0" resolved "https://registry.yarnpkg.com/async/-/async-0.9.0.tgz#ac3613b1da9bed1b47510bb4651b8931e47146c7" +async@2.6.4, async@^2.0.1: + version "2.6.4" + resolved "https://registry.yarnpkg.com/async/-/async-2.6.4.tgz#706b7ff6084664cd7eae713f6f965433b5504221" + dependencies: + lodash "^4.17.14" + async@^1.0.0: version "1.5.2" resolved "https://registry.yarnpkg.com/async/-/async-1.5.2.tgz#ec6a61ae56480c0c3cb241c95618e20892f9672a" -async@^2.0.1: - version "2.3.0" - resolved "https://registry.yarnpkg.com/async/-/async-2.3.0.tgz#1013d1051047dd320fe24e494d5c66ecaf6147d9" - dependencies: - lodash "^4.14.0" - async@~0.2.6, async@~0.2.9: version "0.2.10" resolved "https://registry.yarnpkg.com/async/-/async-0.2.10.tgz#b6bbe0b0674b9d719708ca38de8c237cb526c3d1" @@ -477,6 +473,12 @@ boom@2.x.x: dependencies: hoek "2.x.x" +boom@7.x.x: + version "7.3.0" + resolved "https://registry.yarnpkg.com/boom/-/boom-7.3.0.tgz#733a6d956d33b0b1999da3fe6c12996950d017b9" + dependencies: + hoek "6.x.x" + bower-config@0.6.1: version "0.6.1" resolved "https://registry.yarnpkg.com/bower-config/-/bower-config-0.6.1.tgz#7093155688bef44079bf4cb32d189312c87ded60" @@ -500,7 +502,11 @@ bower-shrinkwrap-resolver-ext@^0.1.0: semver "^5.3.0" string.prototype.endswith "^0.2.0" -bower@1.7.7, bower@^1.3.12: +bower@1.8.4: + version "1.8.4" + resolved "https://registry.yarnpkg.com/bower/-/bower-1.8.4.tgz#e7876a076deb8137f7d06525dc5e8c66db82f28a" + +bower@^1.3.12: version "1.7.7" resolved "https://registry.yarnpkg.com/bower/-/bower-1.7.7.tgz#2fd7ff3ebdcba5a8ffcd84c397c8fdfe9f825f92" @@ -1180,11 +1186,11 @@ cross-spawn-async@^2.0.0: lru-cache "^4.0.0" which "^1.2.8" -cryptiles@2.x.x: - version "2.0.5" - resolved "https://registry.yarnpkg.com/cryptiles/-/cryptiles-2.0.5.tgz#3bdfecdc608147c1c67202fa291e7dca59eaa3b8" +cryptiles@2.x.x, cryptiles@4.1.2: + version "4.1.2" + resolved "https://registry.yarnpkg.com/cryptiles/-/cryptiles-4.1.2.tgz#363c9ab5c859da9d2d6fb901b64d980966181184" dependencies: - boom "2.x.x" + boom "7.x.x" ctype@0.5.3: version "0.5.3" @@ -1220,51 +1226,11 @@ date-now@^0.1.4: version "0.1.4" resolved "https://registry.yarnpkg.com/date-now/-/date-now-0.1.4.tgz#eaf439fd4d4848ad74e5cc7dbef200672b9e345b" -debug@0.7.4: - version "0.7.4" - resolved "https://registry.yarnpkg.com/debug/-/debug-0.7.4.tgz#06e1ea8082c2cb14e39806e22e2f6f757f92af39" - -debug@1.0.2: - version "1.0.2" - resolved "https://registry.yarnpkg.com/debug/-/debug-1.0.2.tgz#3849591c10cce648476c3c7c2e2e3416db5963c4" - dependencies: - ms "0.6.2" - -debug@1.0.3: - version "1.0.3" - resolved "https://registry.yarnpkg.com/debug/-/debug-1.0.3.tgz#fc8c6b2d6002804b4081c0208e0f6460ba1fa3e4" - dependencies: - ms "0.6.2" - -debug@1.0.4: - version "1.0.4" - resolved "https://registry.yarnpkg.com/debug/-/debug-1.0.4.tgz#5b9c256bd54b6ec02283176fa8a0ede6d154cbf8" +debug@0.7.4, debug@1.0.2, debug@1.0.3, debug@1.0.4, debug@2.1.0, debug@2.6.1, debug@2.6.3, debug@2.6.9, debug@^2.1.0, debug@^2.1.1, debug@^2.1.3, debug@^2.2.0, debug@~2.2.0: + version "2.6.9" + resolved "https://registry.yarnpkg.com/debug/-/debug-2.6.9.tgz#5d128515df134ff327e90a4c93f4e077a536341f" dependencies: - ms "0.6.2" - -debug@2.1.0: - version "2.1.0" - resolved "https://registry.yarnpkg.com/debug/-/debug-2.1.0.tgz#33ab915659d8c2cc8a41443d94d6ebd37697ed21" - dependencies: - ms "0.6.2" - -debug@2.6.1: - version "2.6.1" - resolved "https://registry.yarnpkg.com/debug/-/debug-2.6.1.tgz#79855090ba2c4e3115cc7d8769491d58f0491351" - dependencies: - ms "0.7.2" - -debug@2.6.3, debug@^2.1.0, debug@^2.1.1, debug@^2.1.3, debug@^2.2.0: - version "2.6.3" - resolved "https://registry.yarnpkg.com/debug/-/debug-2.6.3.tgz#0f7eb8c30965ec08c72accfa0130c8b79984141d" - dependencies: - ms "0.7.2" - -debug@~2.2.0: - version "2.2.0" - resolved "https://registry.yarnpkg.com/debug/-/debug-2.2.0.tgz#f87057e995b1a1f6ae6a4960664137bc56f039da" - dependencies: - ms "0.7.1" + ms "2.0.0" debuglog@^1.0.1: version "1.0.1" @@ -1391,29 +1357,9 @@ ee-first@1.1.1: version "1.1.1" resolved "https://registry.yarnpkg.com/ee-first/-/ee-first-1.1.1.tgz#590c61156b0ae2f4f0255732a158b266bc56b21d" -em-helpers@0.8.0: - version "0.8.0" - resolved "https://registry.yarnpkg.com/em-helpers/-/em-helpers-0.8.0.tgz#01678f3692a61d563cce68e49459e206d14db095" - dependencies: - ember-cli-htmlbars "^1.0.1" - ember-cli-less "^1.4.0" - source-map "^0.5.6" - optionalDependencies: - phantomjs-prebuilt "2.1.13" - -em-table@0.7.2: - version "0.7.2" - resolved "https://registry.yarnpkg.com/em-table/-/em-table-0.7.2.tgz#867ff734701df9765f2505e02acd74768edb0f71" - dependencies: - ember-cli-htmlbars "^1.0.1" - ember-cli-less "^1.4.0" - source-map "^0.5.6" - optionalDependencies: - phantomjs-prebuilt "2.1.13" - -em-tgraph@0.0.10: - version "0.0.10" - resolved "https://registry.yarnpkg.com/em-tgraph/-/em-tgraph-0.0.10.tgz#8b82a9a1853e4bfd615ff1a9d810435302b0db29" +em-tgraph@0.0.14: + version "0.0.14" + resolved "https://registry.yarnpkg.com/em-tgraph/-/em-tgraph-0.0.14.tgz#4d48b911760f85dec41904e4056ec52542391cc1" dependencies: ember-cli-htmlbars "^1.0.1" ember-cli-less "^1.4.0" @@ -1759,7 +1705,7 @@ ember-truth-helpers@1.3.0: version "1.3.0" resolved "https://registry.yarnpkg.com/ember-truth-helpers/-/ember-truth-helpers-1.3.0.tgz#6ed9f83ce9a49f52bb416d55e227426339a64c60" dependencies: - ember-cli-babel "^5.1.5" + ember-cli-babel "^5.1.6" ember-wormhole@^0.3.4: version "0.3.6" @@ -2329,7 +2275,7 @@ glob-parent@^2.0.0: minimatch "^2.0.1" once "^1.3.0" -glob@5.0.13, glob@^5.0.10, glob@~5.0.0: +glob@5.0.13, glob@^5.0.10: version "5.0.13" resolved "https://registry.yarnpkg.com/glob/-/glob-5.0.13.tgz#0b6ffc3ac64eb90669f723a00a0ebb7281b33f8f" dependencies: @@ -2339,7 +2285,7 @@ glob@5.0.13, glob@^5.0.10, glob@~5.0.0: once "^1.3.0" path-is-absolute "^1.0.0" -glob@5.x, glob@^5.0.15, glob@~5.0.15: +glob@5.x, glob@^5.0.15, glob@~5.0.0, glob@~5.0.15: version "5.0.15" resolved "https://registry.yarnpkg.com/glob/-/glob-5.0.15.tgz#1bc936b9e02f4a603fcc222ecf7633d30b8b93b1" dependencies: @@ -2492,6 +2438,10 @@ hoek@2.x.x: version "2.16.3" resolved "https://registry.yarnpkg.com/hoek/-/hoek-2.16.3.tgz#20bb7403d3cea398e91dc4710a8ff1b8274a25ed" +hoek@6.x.x: + version "6.1.3" + resolved "https://registry.yarnpkg.com/hoek/-/hoek-6.1.3.tgz#73b7d33952e01fe27a38b0457294b79dd8da242c" + home-or-tmp@^1.0.0: version "1.0.0" resolved "https://registry.yarnpkg.com/home-or-tmp/-/home-or-tmp-1.0.0.tgz#4b9f1e40800c3e50c6c27f781676afcce71f3985" @@ -2682,12 +2632,17 @@ is-integer@^1.0.4: dependencies: is-finite "^1.0.0" -is-my-json-valid@^2.12.4: - version "2.16.0" - resolved "https://registry.yarnpkg.com/is-my-json-valid/-/is-my-json-valid-2.16.0.tgz#f079dd9bfdae65ee2038aae8acbc86ab109e3693" +is-my-ip-valid@^1.0.0: + version "1.0.1" + resolved "https://registry.yarnpkg.com/is-my-ip-valid/-/is-my-ip-valid-1.0.1.tgz#f7220d1146257c98672e6fba097a9f3f2d348442" + +is-my-json-valid@2.20.3, is-my-json-valid@^2.12.4: + version "2.20.3" + resolved "https://registry.yarnpkg.com/is-my-json-valid/-/is-my-json-valid-2.20.3.tgz#7e72dfd435b7341bc4ba4caa44ccd5703a8f8e19" dependencies: generate-function "^2.0.0" generate-object-property "^1.1.0" + is-my-ip-valid "^1.0.0" jsonpointer "^4.0.0" xtend "^4.0.0" @@ -2809,9 +2764,9 @@ json-parse-helpfulerror@^1.0.2: dependencies: jju "^1.1.0" -json-schema@0.2.3: - version "0.2.3" - resolved "https://registry.yarnpkg.com/json-schema/-/json-schema-0.2.3.tgz#b480c892e59a2f05954ce727bd3f2a4e882f9e13" +json-schema@0.2.3, json-schema@0.4.0: + version "0.4.0" + resolved "https://registry.yarnpkg.com/json-schema/-/json-schema-0.4.0.tgz#f7de4cf6efab838ebaeb3236474cbba5a1930ab5" json-stable-stringify@^1.0.0, json-stable-stringify@^1.0.1: version "1.0.1" @@ -2841,9 +2796,9 @@ jsonify@~0.0.0: version "0.0.0" resolved "https://registry.yarnpkg.com/jsonify/-/jsonify-0.0.0.tgz#2c74b6ee41d93ca51b7b5aaee8f503631d252a73" -jsonpointer@^4.0.0: - version "4.0.1" - resolved "https://registry.yarnpkg.com/jsonpointer/-/jsonpointer-4.0.1.tgz#4fd92cb34e0e9db3c89c8622ecf51f9b978c6cb9" +jsonpointer@4.1.0, jsonpointer@^4.0.0: + version "4.1.0" + resolved "https://registry.yarnpkg.com/jsonpointer/-/jsonpointer-4.1.0.tgz#501fb89986a2389765ba09e6053299ceb4f2c2cc" jsprim@^1.2.2: version "1.4.0" @@ -2936,14 +2891,6 @@ lodash-node@^3.4.0: version "3.10.2" resolved "https://registry.yarnpkg.com/lodash-node/-/lodash-node-3.10.2.tgz#2598d5b1b54e6a68b4cb544e5c730953cbf632f7" -lodash._arraycopy@^3.0.0: - version "3.0.0" - resolved "https://registry.yarnpkg.com/lodash._arraycopy/-/lodash._arraycopy-3.0.0.tgz#76e7b7c1f1fb92547374878a562ed06a3e50f6e1" - -lodash._arrayeach@^3.0.0: - version "3.0.0" - resolved "https://registry.yarnpkg.com/lodash._arrayeach/-/lodash._arrayeach-3.0.0.tgz#bab156b2a90d3f1bbd5c653403349e5e5933ef9e" - lodash._baseassign@^3.0.0: version "3.2.0" resolved "https://registry.yarnpkg.com/lodash._baseassign/-/lodash._baseassign-3.2.0.tgz#8c38a099500f215ad09e59f1722fd0c52bfe0a4e" @@ -2964,10 +2911,6 @@ lodash._basecopy@^3.0.0: version "3.0.1" resolved "https://registry.yarnpkg.com/lodash._basecopy/-/lodash._basecopy-3.0.1.tgz#8da0e6a876cf344c0ad8a54882111dd3c5c7ca36" -lodash._basefor@^3.0.0: - version "3.0.3" - resolved "https://registry.yarnpkg.com/lodash._basefor/-/lodash._basefor-3.0.3.tgz#7550b4e9218ef09fad24343b612021c79b4c20c2" - lodash._baseindexof@^3.0.0: version "3.1.0" resolved "https://registry.yarnpkg.com/lodash._baseindexof/-/lodash._baseindexof-3.1.0.tgz#fe52b53a1c6761e42618d654e4a25789ed61822c" @@ -3041,14 +2984,6 @@ lodash.isarray@^3.0.0: version "3.0.4" resolved "https://registry.yarnpkg.com/lodash.isarray/-/lodash.isarray-3.0.4.tgz#79e4eb88c36a8122af86f844aa9bcd851b5fbb55" -lodash.isplainobject@^3.0.0: - version "3.2.0" - resolved "https://registry.yarnpkg.com/lodash.isplainobject/-/lodash.isplainobject-3.2.0.tgz#9a8238ae16b200432960cd7346512d0123fbf4c5" - dependencies: - lodash._basefor "^3.0.0" - lodash.isarguments "^3.0.0" - lodash.keysin "^3.0.0" - lodash.istypedarray@^3.0.0: version "3.0.6" resolved "https://registry.yarnpkg.com/lodash.istypedarray/-/lodash.istypedarray-3.0.6.tgz#c9a477498607501d8e8494d283b87c39281cef62" @@ -3061,32 +2996,9 @@ lodash.keys@^3.0.0: lodash.isarguments "^3.0.0" lodash.isarray "^3.0.0" -lodash.keysin@^3.0.0: - version "3.0.8" - resolved "https://registry.yarnpkg.com/lodash.keysin/-/lodash.keysin-3.0.8.tgz#22c4493ebbedb1427962a54b445b2c8a767fb47f" - dependencies: - lodash.isarguments "^3.0.0" - lodash.isarray "^3.0.0" - -lodash.merge@^3.0.2, lodash.merge@^3.3.2: - version "3.3.2" - resolved "https://registry.yarnpkg.com/lodash.merge/-/lodash.merge-3.3.2.tgz#0d90d93ed637b1878437bb3e21601260d7afe994" - dependencies: - lodash._arraycopy "^3.0.0" - lodash._arrayeach "^3.0.0" - lodash._createassigner "^3.0.0" - lodash._getnative "^3.0.0" - lodash.isarguments "^3.0.0" - lodash.isarray "^3.0.0" - lodash.isplainobject "^3.0.0" - lodash.istypedarray "^3.0.0" - lodash.keys "^3.0.0" - lodash.keysin "^3.0.0" - lodash.toplainobject "^3.0.0" - -lodash.merge@^4.5.1: - version "4.6.0" - resolved "https://registry.yarnpkg.com/lodash.merge/-/lodash.merge-4.6.0.tgz#69884ba144ac33fe699737a6086deffadd0f89c5" +lodash.merge@4.6.2, lodash.merge@^3.0.2, lodash.merge@^3.3.2, lodash.merge@^4.5.1: + version "4.6.2" + resolved "https://registry.yarnpkg.com/lodash.merge/-/lodash.merge-4.6.2.tgz#558aa53b43b661e1925a0afdfa36a9a1085fe57a" lodash.pad@^4.1.0: version "4.5.1" @@ -3110,13 +3022,6 @@ lodash.restparam@^3.0.0: version "3.6.1" resolved "https://registry.yarnpkg.com/lodash.restparam/-/lodash.restparam-3.6.1.tgz#936a4e309ef330a7645ed4145986c85ae5b20805" -lodash.toplainobject@^3.0.0: - version "3.0.0" - resolved "https://registry.yarnpkg.com/lodash.toplainobject/-/lodash.toplainobject-3.0.0.tgz#28790ad942d293d78aa663a07ecf7f52ca04198d" - dependencies: - lodash._basecopy "^3.0.0" - lodash.keysin "^3.0.0" - lodash.uniq@^3.2.2: version "3.2.2" resolved "https://registry.yarnpkg.com/lodash.uniq/-/lodash.uniq-3.2.2.tgz#146c36f25e75d19501ba402e88ba14937f63cd8b" @@ -3135,9 +3040,9 @@ lodash@^3.10.0, lodash@^3.6.0, lodash@^3.9.3: version "3.10.1" resolved "https://registry.yarnpkg.com/lodash/-/lodash-3.10.1.tgz#5bf45e8e49ba4189e17d482789dfd15bd140b7b6" -lodash@^4.14.0: - version "4.17.4" - resolved "https://registry.yarnpkg.com/lodash/-/lodash-4.17.4.tgz#78203a4d1c328ae1d86dca6460e369b57f4055ae" +lodash@^4.17.14: + version "4.17.21" + resolved "https://registry.yarnpkg.com/lodash/-/lodash-4.17.21.tgz#679591c564c3bffaae8454cf0b3df370c3d6911c" lodash@~2.3.0: version "2.3.0" @@ -3310,9 +3215,9 @@ minimatch@~0.2.9: lru-cache "2" sigmund "~1.0.0" -minimist@0.0.8, minimist@~0.0.1: - version "0.0.8" - resolved "https://registry.yarnpkg.com/minimist/-/minimist-0.0.8.tgz#857fcabfc3397d2625b8228262e86aa7a011b05d" +minimist@0.0.8, minimist@1.2.6, minimist@~0.0.1: + version "1.2.6" + resolved "https://registry.yarnpkg.com/minimist/-/minimist-1.2.6.tgz#8637a5b759ea0d6e98702cfb3a9283323c93af44" minimist@^1.1.0, minimist@^1.1.1: version "1.2.0" @@ -3348,11 +3253,11 @@ moment-timezone@^0.3.0: version "0.3.1" resolved "https://registry.yarnpkg.com/moment-timezone/-/moment-timezone-0.3.1.tgz#3ef47856b02d53b718a10a5ec2023aa299e07bf5" dependencies: - moment ">= 2.6.0" + moment ">= 2.29.4" -"moment@>= 2.6.0": - version "2.18.1" - resolved "https://registry.yarnpkg.com/moment/-/moment-2.18.1.tgz#c36193dd3ce1c2eed2adb7c802dbbc77a81b1c0f" +"moment@>= 2.29.4": + version "2.29.4" + resolved "https://registry.yarnpkg.com/moment/-/moment-2.29.4.tgz#3dbe052889fe7c1b2ed966fcb3a77328964ef108" morgan@^1.5.2: version "1.8.1" @@ -3368,18 +3273,14 @@ mout@~0.9.0: version "0.9.1" resolved "https://registry.yarnpkg.com/mout/-/mout-0.9.1.tgz#84f0f3fd6acc7317f63de2affdcc0cee009b0477" -ms@0.6.2: - version "0.6.2" - resolved "https://registry.yarnpkg.com/ms/-/ms-0.6.2.tgz#d89c2124c6fdc1353d65a8b77bf1aac4b193708c" - -ms@0.7.1: - version "0.7.1" - resolved "https://registry.yarnpkg.com/ms/-/ms-0.7.1.tgz#9cd13c03adbff25b65effde7ce864ee952017098" - ms@0.7.2: version "0.7.2" resolved "https://registry.yarnpkg.com/ms/-/ms-0.7.2.tgz#ae25cf2512b3885a1d95d7f037868d8431124765" +ms@2.0.0: + version "2.0.0" + resolved "https://registry.yarnpkg.com/ms/-/ms-2.0.0.tgz#5608aeadfc00be6c2901df5f9861788de0d597c8" + mustache@^2.0.0: version "2.3.0" resolved "https://registry.yarnpkg.com/mustache/-/mustache-2.3.0.tgz#4028f7778b17708a489930a6e52ac3bca0da41d0" @@ -3881,21 +3782,9 @@ q@^1.1.2: version "1.5.0" resolved "https://registry.yarnpkg.com/q/-/q-1.5.0.tgz#dd01bac9d06d30e6f219aecb8253ee9ebdc308f1" -qs@5.2.0, qs@~5.2.0: - version "5.2.0" - resolved "https://registry.yarnpkg.com/qs/-/qs-5.2.0.tgz#a9f31142af468cb72b25b30136ba2456834916be" - -qs@6.4.0: - version "6.4.0" - resolved "https://registry.yarnpkg.com/qs/-/qs-6.4.0.tgz#13e26d28ad6b0ffaa91312cd3bf708ed351e7233" - -qs@~5.1.0: - version "5.1.0" - resolved "https://registry.yarnpkg.com/qs/-/qs-5.1.0.tgz#4d932e5c7ea411cca76a312d39a606200fd50cd9" - -qs@~6.2.0: - version "6.2.3" - resolved "https://registry.yarnpkg.com/qs/-/qs-6.2.3.tgz#1cfcb25c10a9b2b483053ff39f5dfc9233908cfe" +qs@5.2.0, qs@6.2.4, qs@6.4.0, qs@~5.1.0, qs@~5.2.0, qs@~6.2.0: + version "6.2.4" + resolved "https://registry.yarnpkg.com/qs/-/qs-6.2.4.tgz#d90821bb8537cecc140e6c34f54ec76e54b39b22" quick-temp@0.1.3, quick-temp@^0.1.0, quick-temp@^0.1.2, quick-temp@^0.1.3: version "0.1.3" @@ -4020,7 +3909,7 @@ realize-package-specifier@~3.0.1: dezalgo "^1.0.1" npm-package-arg "^4.1.1" -recast@0.10.33: +recast@0.10.33, recast@^0.10.10: version "0.10.33" resolved "https://registry.yarnpkg.com/recast/-/recast-0.10.33.tgz#942808f7aa016f1fa7142c461d7e5704aaa8d697" dependencies: @@ -4029,15 +3918,6 @@ recast@0.10.33: private "~0.1.5" source-map "~0.5.0" -recast@^0.10.10: - version "0.10.43" - resolved "https://registry.yarnpkg.com/recast/-/recast-0.10.43.tgz#b95d50f6d60761a5f6252e15d80678168491ce7f" - dependencies: - ast-types "0.8.15" - esprima-fb "~15001.1001.0-dev-harmony-fb" - private "~0.1.5" - source-map "~0.5.0" - recast@^0.11.17, recast@^0.11.3: version "0.11.23" resolved "https://registry.yarnpkg.com/recast/-/recast-0.11.23.tgz#451fd3004ab1e4df9b4e4b66376b2a21912462d3" @@ -4424,10 +4304,6 @@ spdx-expression-parse@~1.0.0: version "1.0.4" resolved "https://registry.yarnpkg.com/spdx-expression-parse/-/spdx-expression-parse-1.0.4.tgz#9bdf2f20e1f40ed447fbe273266191fced51626c" -spdx-license-ids@*: - version "2.0.1" - resolved "https://registry.yarnpkg.com/spdx-license-ids/-/spdx-license-ids-2.0.1.tgz#02017bcc3534ee4ffef6d58d20e7d3e9a1c3c8ec" - spdx-license-ids@^1.0.0, spdx-license-ids@^1.0.2: version "1.2.2" resolved "https://registry.yarnpkg.com/spdx-license-ids/-/spdx-license-ids-1.2.2.tgz#c9df7a3424594ade6bd11900d596696dc06bac57" @@ -4489,12 +4365,6 @@ stringstream@~0.0.4: version "0.0.5" resolved "https://registry.yarnpkg.com/stringstream/-/stringstream-0.0.5.tgz#4e484cd4de5a0bbbee18e46307710a8a81621878" -strip-ansi@*, strip-ansi@^3.0.0: - version "3.0.1" - resolved "https://registry.yarnpkg.com/strip-ansi/-/strip-ansi-3.0.1.tgz#6a385fb8853d952d5ff05d0e8aaf94278dc63dcf" - dependencies: - ansi-regex "^2.0.0" - strip-ansi@^0.3.0: version "0.3.0" resolved "https://registry.yarnpkg.com/strip-ansi/-/strip-ansi-0.3.0.tgz#25f48ea22ca79187f3174a4db8759347bb126220" @@ -4507,6 +4377,12 @@ strip-ansi@^2.0.1: dependencies: ansi-regex "^1.0.0" +strip-ansi@^3.0.0: + version "3.0.1" + resolved "https://registry.yarnpkg.com/strip-ansi/-/strip-ansi-3.0.1.tgz#6a385fb8853d952d5ff05d0e8aaf94278dc63dcf" + dependencies: + ansi-regex "^2.0.0" + strip-ansi@~0.1.0: version "0.1.1" resolved "https://registry.yarnpkg.com/strip-ansi/-/strip-ansi-0.1.1.tgz#39e8a98d044d150660abe4a6808acf70bb7bc991"