Skip to content

Commit

Permalink
SAMZA-468; create an integration test suite for samza.
Browse files Browse the repository at this point in the history
  • Loading branch information
Navina Ramesh authored and Chris Riccomini committed Jan 14, 2015
1 parent 0ebfcbd commit 6bbbaa5
Show file tree
Hide file tree
Showing 27 changed files with 949 additions and 1 deletion.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -26,3 +26,4 @@ docs/learn/documentation/*/api/javadocs
.DS_Store
out/
*.patch
**.pyc
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,10 @@ To run key-value performance tests:

./gradlew samza-shell:kvPerformanceTest -PconfigPath=file://$PWD/samza-test/src/main/resources/perf/kv-perf.properties

To run all integration tests:

./bin/integration-tests.sh <dir>

### Job Management

To run a job (defined in a properties file):
Expand Down
86 changes: 86 additions & 0 deletions bin/integration-tests.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
#!/bin/bash -e
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
BASE_DIR=$DIR/..
TEST_DIR=$1

if test -z "$TEST_DIR"; then
echo
echo " USAGE:"
echo
echo " ${BASH_SOURCE[0]##*/} \"<dirname to run tests in>\" [zopkio args...]"
echo
exit 0
fi

# always use absolute paths for ABS_TEST_DIR
ABS_TEST_DIR=$(cd $(dirname $TEST_DIR); pwd)/$(basename $TEST_DIR)
SCRIPTS_DIR=$ABS_TEST_DIR/scripts

# safety check for virtualenv
if [ -f $HOME/.pydistutils.cfg ]; then
echo "Virtualenv can't run while $HOME/.pydistutils.cfg exists."
echo "Please remove $HOME/.pydistutils.cfg, and try again."
exit 0
fi

# build integration test tarball
./gradlew releaseTestJobs

# create integration test directory
mkdir -p $ABS_TEST_DIR
rm -rf $SCRIPTS_DIR
cp -r samza-test/src/main/python/ $SCRIPTS_DIR
cp ./samza-test/build/distributions/samza-test*.tgz $ABS_TEST_DIR
cd $ABS_TEST_DIR

# setup virtualenv locally if it's not already there
VIRTUAL_ENV=virtualenv-12.0.2
if [[ ! -d "${ABS_TEST_DIR}/${VIRTUAL_ENV}" ]] ; then
curl -O https://pypi.python.org/packages/source/v/virtualenv/$VIRTUAL_ENV.tar.gz
tar xvfz $VIRTUAL_ENV.tar.gz
fi

# build a clean virtual environment
SAMZA_INTEGRATION_TESTS_DIR=$ABS_TEST_DIR/samza-integration-tests
if [[ ! -d "${SAMZA_INTEGRATION_TESTS_DIR}" ]] ; then
python $VIRTUAL_ENV/virtualenv.py $SAMZA_INTEGRATION_TESTS_DIR
fi

# activate the virtual environment
source $SAMZA_INTEGRATION_TESTS_DIR/bin/activate

# install zopkio and requests
pip install -r $SCRIPTS_DIR/requirements.txt

# treat all trailing parameters (after dirname) as zopkio switches
shift
SWITCHES="$*"

# default to info-level debugging if not specified
if [[ $SWITCHES != *"console-log-level"* ]]; then
SWITCHES="$SWITCHES --console-log-level INFO"
fi

# run the tests
zopkio --config-overrides remote_install_path=$ABS_TEST_DIR $SWITCHES $SCRIPTS_DIR/tests.py

# go back to execution directory
deactivate
cd $DIR
19 changes: 19 additions & 0 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,8 @@ project(":samza-yarn_$scalaVersion") {
compile("org.apache.hadoop:hadoop-common:$yarnVersion") {
exclude module: 'slf4j-log4j12'
exclude module: 'servlet-api'
// Exclude because YARN's 3.4.5 ZK version is incompatbile with Kafka's 3.3.4.
exclude module: 'zookeeper'
}
compile("org.scalatra:scalatra_$scalaVersion:$scalatraVersion") {
exclude module: 'scala-compiler'
Expand All @@ -216,6 +218,7 @@ project(":samza-yarn_$scalaVersion") {
exclude module: 'slf4j-api'
}
compile "joda-time:joda-time:$jodaTimeVersion"
compile "org.apache.zookeeper:zookeeper:$zookeeperVersion"
testCompile "junit:junit:$junitVersion"
testCompile "org.mockito:mockito-all:$mockitoVersion"
}
Expand Down Expand Up @@ -362,6 +365,9 @@ project(":samza-test_$scalaVersion") {
compile project(":samza-kv-leveldb_$scalaVersion")
compile project(":samza-kv-rocksdb_$scalaVersion")
compile project(":samza-core_$scalaVersion")
runtime project(":samza-log4j")
runtime project(":samza-yarn_$scalaVersion")
runtime project(":samza-kafka_$scalaVersion")
compile "org.scala-lang:scala-library:$scalaLibVersion"
compile "net.sf.jopt-simple:jopt-simple:$joptSimpleVersion"
compile "javax.mail:mail:1.4"
Expand All @@ -386,4 +392,17 @@ project(":samza-test_$scalaVersion") {
// useful for configuring TestSamzaContainerPerformance from the CLI.
systemProperties = System.properties.findAll { it.key.startsWith("samza") }
}

tasks.create(name: "releaseTestJobs", dependsOn: configurations.archives.artifacts, type: Tar) {
compression = Compression.GZIP
from(file("$projectDir/src/main/config")) { into "config/" }
from(file("$projectDir/src/main/resources")) { into "lib/" }
from(project(':samza-shell').file("src/main/bash")) { into "bin/" }
from(project(':samza-shell').file("src/main/resources")) { into "lib/" }
from(project(':samza-shell').file("src/main/resources/log4j-console.xml")) { into "bin/" }
from '../LICENSE'
from '../NOTICE'
from(configurations.runtime) { into("lib/") }
from(configurations.archives.artifacts.files) { into("lib/") }
}
}
46 changes: 45 additions & 1 deletion docs/contribute/tests.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ title: Tests
limitations under the License.
-->

Samza's unit tests are written on top of [JUnit](http://junit.org/), and license checking is done with [Apache Rat](http://creadur.apache.org/rat/). An extensive integration test suite is not currently available. This is being actively worked on in [SAMZA-6](https://issues.apache.org/jira/browse/SAMZA-6) and [SAMZA-14](https://issues.apache.org/jira/browse/SAMZA-14).
Samza's unit tests are written on top of [JUnit](http://junit.org/), and license checking is done with [Apache Rat](http://creadur.apache.org/rat/). Samza's integration tests are written on top of [Zopkio](https://github.com/linkedin/Zopkio).

### Running Unit Tests Locally

Expand Down Expand Up @@ -67,3 +67,47 @@ On Mac, check-all.sh will default to the appropriate path for each environment v
[Travis CI](https://travis-ci.org/apache/incubator-samza) has been configured to run Samza's unit tests after every commit to Samza's [master branch](https://git-wip-us.apache.org/repos/asf?p=incubator-samza.git;a=tree). The test results are mailed to the [developer mailing list](/community/mailing-lists.html), and posted in the [IRC channel](/community/irc.html).

[![Build Status](https://travis-ci.org/apache/incubator-samza.svg?branch=master)](https://travis-ci.org/apache/incubator-samza)

### Running Integration Tests Locally

Samza uses [Zopkio](https://github.com/linkedin/Zopkio) to deploy and execute its integration tests. Integration tests can be executed by running:

./bin/integration-tests.sh /tmp/samza-tests

The parameter defines where the integration tests should install packages both locally and on remote systems. Executing this command will:

1. Build a samza-test job tarball.
2. Download and install YARN, Kafka, and ZooKeeper.
3. Deploy the samza-test job tarball to all YARN NM machines.
4. Start all Samza integration test jobs.
5. Feed input data to the jobs, and verify the results.
6. Open a report, and aggregate all remote logs.

The default configurations that ship with Samza deploy all software, and run all tests locally on the machine from which the `integration-tests.sh` command was executed.

The integration tests use SSH to interact with remote machines (and localhost). This means that you need an authentication mechanism when connecting to the machines. The two authentication mechanisms provided are:

1. Interactive
2. Public key

#### Interactive

Zopkio will prompt you for a password by default. This password will be used as the SSH password when trying to log into remote systems.

#### Public Key

Zopkio supports public key authentication if you prefer to use it, or if your environment doesn't allow interactive authentication. To use public key authentication, add your public SSH key to ~/.ssh/authorized\_keys, and SSH to all of the machines that you'll be deploying to (localhost by default). See [here](http://www.linuxproblem.org/art_9.html) for details.

Once this is done, you can run Zopkio with the \-\-nopassword parameter:

./bin/integration-tests.sh /tmp/samza-tests --nopassword

This will skip the password prompt, and force Zopkio to try public key authentication.

#### Console Logging

The integration-tests.sh script will set the console log level to INFO by default. The level can be changed with:

./bin/integration-tests.sh /tmp/samza-tests --console-log-level DEBUG

Changing this setting will define how verbose Zopkio is during test execution. It does not affect any of the log4j.xml settings in Samza, YARN, Kafka, or ZooKeeper.
55 changes: 55 additions & 0 deletions samza-test/src/main/config/negate-number.properties
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

# Job
job.factory.class=org.apache.samza.job.yarn.YarnJobFactory
job.name=samza-negate-number

# YARN
yarn.container.count=1
yarn.container.memory.mb=1024

# Task
task.class=org.apache.samza.test.integration.NegateNumberTask
task.inputs=kafka.samza-test-topic
task.checkpoint.factory=org.apache.samza.checkpoint.kafka.KafkaCheckpointManagerFactory
task.checkpoint.replication.factor=1
task.checkpoint.system=kafka
task.lifecycle.listener.generator.class=com.linkedin.samza.task.GeneratorLifecycleListenerFactory
task.lifecycle.listener.generator.fabric=CORP-EAT1
task.opts=-Xmx6g
task.command.class=org.apache.samza.job.ShellCommandBuilder

# Serializers
serializers.registry.string.class=org.apache.samza.serializers.StringSerdeFactory

# Kafka System
systems.kafka.samza.factory=org.apache.samza.system.kafka.KafkaSystemFactory
systems.kafka.samza.msg.serde=string
systems.kafka.samza.key.serde=string
systems.kafka.samza.offset.default=oldest
systems.kafka.consumer.zookeeper.connect=localhost:2181/
systems.kafka.producer.compression.codec=gzip
systems.kafka.producer.metadata.broker.list=localhost:9092
systems.kafka.producer.request.required.acks=1
systems.kafka.producer.topic.metadata.refresh.interval.ms=86400000
systems.kafka.producer.producer.type=sync
# Normally, we'd set this much higher, but we want things to look snappy in the demo.
systems.kafka.producer.batch.num.messages=1

# negate-number
streams.samza-test-topic.consumer.reset.offset=true
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.apache.samza.test.integration;

import org.apache.samza.system.IncomingMessageEnvelope;
import org.apache.samza.system.OutgoingMessageEnvelope;
import org.apache.samza.system.SystemStream;
import org.apache.samza.task.MessageCollector;
import org.apache.samza.task.StreamTask;
import org.apache.samza.task.TaskCoordinator;

/*
* A simple test job that reads strings, converts them to integers, multiplies
* by -1, and outputs to "samza-test-topic-output" stream.
*/
public class NegateNumberTask implements StreamTask {
public void process(IncomingMessageEnvelope envelope, MessageCollector collector, TaskCoordinator coordinator) {
String input = (String) envelope.getMessage();
Integer number = Integer.valueOf(input);
Integer output = number.intValue() * -1;
collector.send(new OutgoingMessageEnvelope(new SystemStream("kafka", "samza-test-topic-output"), output.toString()));
}
}
5 changes: 5 additions & 0 deletions samza-test/src/main/python/configs/downloads.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{
"url_kafka": "http://www.us.apache.org/dist/kafka/0.8.1.1/kafka_2.9.2-0.8.1.1.tgz",
"url_zookeeper": "http://archive.apache.org/dist/zookeeper/zookeeper-3.4.3/zookeeper-3.4.3.tar.gz",
"url_hadoop": "https://archive.apache.org/dist/hadoop/common/hadoop-2.4.0/hadoop-2.4.0.tar.gz"
}
23 changes: 23 additions & 0 deletions samza-test/src/main/python/configs/kafka.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
{
"kafka_hosts": {
"kafka_instance_0": "localhost"
},
"kafka_port": 9092,
"kafka_start_cmd": "kafka_2.9.2-0.8.1.1/bin/kafka-server-start.sh -daemon kafka_2.9.2-0.8.1.1/config/server.properties",
"kafka_stop_cmd": "kafka_2.9.2-0.8.1.1/bin/kafka-server-stop.sh",
"kafka_install_path": "deploy/kafka",
"kafka_executable": "kafka_2.9.2-0.8.1.1.tgz",
"kafka_post_install_cmds": [
"sed -i.bak 's/SIGINT/SIGTERM/g' kafka_2.9.2-0.8.1.1/bin/kafka-server-stop.sh",
"sed -i.bak 's/^num\\.partitions *=.*/num.partitions=1/' kafka_2.9.2-0.8.1.1/config/server.properties",
"sed -i.bak 's/.*log.dirs.*/log.dirs=data/g' kafka_2.9.2-0.8.1.1/config/server.properties"
],
"kafka_logs": [
"log-cleaner.log",
"kafka_2.9.2-0.8.1.1/logs/controller.log",
"kafka_2.9.2-0.8.1.1/logs/kafka-request.log",
"kafka_2.9.2-0.8.1.1/logs/kafkaServer-gc.log",
"kafka_2.9.2-0.8.1.1/logs/server.log",
"kafka_2.9.2-0.8.1.1/logs/state-change.log"
]
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{
"samza_executable": "samza-test_2.10-0.9.0-SNAPSHOT.tgz",
"samza_install_path": "deploy/smoke_tests",
"samza_config_factory": "org.apache.samza.config.factories.PropertiesConfigFactory",
"samza_config_file": "config/negate-number.properties"
}
38 changes: 38 additions & 0 deletions samza-test/src/main/python/configs/yarn.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
{
"yarn_site_template": "scripts/templates/yarn-site.xml",
"yarn_rm_hosts": {
"yarn_rm_instance_0": "localhost"
},
"yarn_rm_start_cmd": "hadoop-2.4.0/sbin/yarn-daemon.sh start resourcemanager",
"yarn_rm_stop_cmd": "hadoop-2.4.0/sbin/yarn-daemon.sh stop resourcemanager",
"yarn_rm_install_path": "deploy/yarn_rm",
"yarn_rm_post_install_cmds": [
"sed -i.bak '/<configuration>/a <property><name>yarn.nodemanager.vmem-pmem-ratio</name><value>10</value></property>' hadoop-2.4.0/etc/hadoop/yarn-site.xml",
"mkdir -p hadoop-2.4.0/conf",
"chmod 755 hadoop-2.4.0/conf",
"cp hadoop-2.4.0/etc/hadoop/yarn-site.xml hadoop-2.4.0/conf/yarn-site.xml"
],
"yarn_rm_executable": "hadoop-2.4.0.tar.gz",
"yarn_rm_logs": [
"hadoop-2.4.0/logs"
],
"yarn_nm_hosts": {
"yarn_nm_instance_0": "localhost"
},
"yarn_nm_start_cmd": "hadoop-2.4.0/sbin/yarn-daemon.sh start nodemanager",
"yarn_nm_stop_cmd": "hadoop-2.4.0/sbin/yarn-daemon.sh stop nodemanager",
"yarn_nm_install_path": "deploy/yarn_nm",
"yarn_nm_post_install_cmds": [
"sed -i.bak '/<configuration>/a <property><name>yarn.nodemanager.vmem-pmem-ratio</name><value>10</value></property>' hadoop-2.4.0/etc/hadoop/yarn-site.xml",
"mkdir -p hadoop-2.4.0/conf",
"chmod 755 hadoop-2.4.0/conf",
"cp hadoop-2.4.0/etc/hadoop/yarn-site.xml hadoop-2.4.0/conf/yarn-site.xml"
],
"yarn_nm_executable": "hadoop-2.4.0.tar.gz",
"yarn_nm_logs": [
"hadoop-2.4.0/logs"
],
"yarn_driver_configs": {
"yarn.resourcemanager.hostname": "localhost"
}
}
16 changes: 16 additions & 0 deletions samza-test/src/main/python/configs/zookeeper.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
{
"zookeeper_hosts": {
"zookeeper_instance_0": "localhost"
},
"zookeeper_start_cmd": "zookeeper-3.4.3/bin/zkServer.sh start",
"zookeeper_stop_cmd": "zookeeper-3.4.3/bin/zkServer.sh stop",
"zookeeper_install_path": "deploy/zookeeper",
"zookeeper_executable": "zookeeper-3.4.3.tar.gz",
"zookeeper_post_install_cmds": [
"cp zookeeper-3.4.3/conf/zoo_sample.cfg zookeeper-3.4.3/conf/zoo.cfg",
"sed -i.bak 's/.*dataDir=.*/dataDir=data/g' zookeeper-3.4.3/conf/zoo.cfg"
],
"zookeeper_logs": [
"zookeeper.out"
]
}
Loading

0 comments on commit 6bbbaa5

Please sign in to comment.