Skip to content

Commit

Permalink
qa: hadoop plays nice with new teuthology task
Browse files Browse the repository at this point in the history
This brings the hadoop wordcount up-to-date with the new teuthology
hadoop task.

Signed-off-by: Noah Watkins <[email protected]>
  • Loading branch information
dotnwat committed Feb 12, 2015
1 parent 81b8c37 commit de6b53a
Showing 1 changed file with 23 additions and 34 deletions.
57 changes: 23 additions & 34 deletions qa/workunits/hadoop/wordcount.sh
Original file line number Diff line number Diff line change
@@ -1,48 +1,37 @@
#!/bin/sh -ex
#!/bin/bash

set -e
set -x

WC_INPUT=/wc_input
WC_OUTPUT=/wc_output
DATA_INPUT=$(mktemp -d)

echo "starting hadoop-wordcount test"

# bail if $TESTDIR is not set as this test will fail in that scenario
[ -z $TESTDIR ] && { echo "\$TESTDIR needs to be set, but is not. Exiting."; exit 1; }

# if HADOOP_PREFIX is not set, use default
[ -z $HADOOP_PREFIX ] && { HADOOP_PREFIX=$TESTDIR/apache_hadoop; }

# if HADOOP_MR_HOME is not set, use default
[ -z $HADOOP_MR_HOME ] && { HADOOP_MR_HOME=$TESTDIR/apache_hadoop/build; }
[ -z $HADOOP_PREFIX ] && { HADOOP_PREFIX=$TESTDIR/hadoop; }

export JAVA_HOME=/usr/lib/jvm/default-java

set -e
set -x
# Nuke hadoop directories
$HADOOP_PREFIX/bin/hadoop fs -rm -r $WC_INPUT $WC_OUTPUT || true

# Fetch and import testing data set
curl http://ceph.com/qa/hadoop_input_files.tar | tar xf - -C $DATA_INPUT
$HADOOP_PREFIX/bin/hadoop fs -copyFromLocal $DATA_INPUT $WC_INPUT
rm -rf $DATA_INPUT

# Run the job
$HADOOP_PREFIX/bin/hadoop jar \
$HADOOP_PREFIX/share/hadoop/mapreduce/hadoop-mapreduce-examples-*.jar \
wordcount $WC_INPUT $WC_OUTPUT

# Clear out in case there was a previous run (idempotency)
if $HADOOP_PREFIX/bin/hadoop fs -ls /wordcount_output 2>/dev/null ; then
$HADOOP_PREFIX/bin/hadoop fs -rmr /wordcount_output
fi
if $HADOOP_PREFIX/bin/hadoop fs -ls /wordcount_input 2>/dev/null ; then
$HADOOP_PREFIX/bin/hadoop fs -rmr /wordcount_input
fi
rm -rf $TESTDIR/hadoop_input

# Load input files into local filesystem
mkdir -p $TESTDIR/hadoop_input
wget http://ceph.com/qa/hadoop_input_files.tar -O $TESTDIR/hadoop_input/files.tar
cd $TESTDIR/hadoop_input
tar -xf $TESTDIR/hadoop_input/files.tar

# Load input files into hadoop filesystem
$HADOOP_PREFIX/bin/hadoop fs -mkdir /wordcount_input
$HADOOP_PREFIX/bin/hadoop fs -put $TESTDIR/hadoop_input/*txt /wordcount_input/

# Execute job
$HADOOP_PREFIX/bin/hadoop jar $HADOOP_MR_HOME/hadoop*examples*jar wordcount /wordcount_input /wordcount_output

# Clean up
$HADOOP_PREFIX/bin/hadoop fs -rmr /wordcount_output
$HADOOP_PREFIX/bin/hadoop fs -rmr /wordcount_input
cd $TESTDIR
rm -rf $TESTDIR/hadoop_input
# Cleanup
$HADOOP_PREFIX/bin/hadoop fs -rm -r $WC_INPUT $WC_OUTPUT || true

echo "completed hadoop-wordcount test"
exit 0

0 comments on commit de6b53a

Please sign in to comment.