forked from apache/kafka
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
KAFKA-5281; System tests for transactions
Author: Apurva Mehta <[email protected]> Reviewers: Jason Gustafson <[email protected]> Closes apache#3149 from apurvam/KAFKA-5281-transactions-system-tests
- Loading branch information
Showing
10 changed files
with
706 additions
and
6 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
183 changes: 183 additions & 0 deletions
183
tests/kafkatest/services/transactional_message_copier.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,183 @@ | ||
# Licensed to the Apache Software Foundation (ASF) under one or more | ||
# contributor license agreements. See the NOTICE file distributed with | ||
# this work for additional information regarding copyright ownership. | ||
# The ASF licenses this file to You under the Apache License, Version 2.0 | ||
# (the "License"); you may not use this file except in compliance with | ||
# the License. You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
import os | ||
import json | ||
import signal | ||
|
||
from ducktape.utils.util import wait_until | ||
from ducktape.services.background_thread import BackgroundThreadService | ||
from kafkatest.directory_layout.kafka_path import KafkaPathResolverMixin | ||
from ducktape.cluster.remoteaccount import RemoteCommandError | ||
|
||
class TransactionalMessageCopier(KafkaPathResolverMixin, BackgroundThreadService): | ||
"""This service wraps org.apache.kafka.tools.TransactionalMessageCopier for | ||
use in system testing. | ||
""" | ||
PERSISTENT_ROOT = "/mnt/transactional_message_copier" | ||
STDOUT_CAPTURE = os.path.join(PERSISTENT_ROOT, "transactional_message_copier.stdout") | ||
STDERR_CAPTURE = os.path.join(PERSISTENT_ROOT, "transactional_message_copier.stderr") | ||
LOG_DIR = os.path.join(PERSISTENT_ROOT, "logs") | ||
LOG_FILE = os.path.join(LOG_DIR, "transactional_message_copier.log") | ||
LOG4J_CONFIG = os.path.join(PERSISTENT_ROOT, "tools-log4j.properties") | ||
|
||
logs = { | ||
"transactional_message_copier_stdout": { | ||
"path": STDOUT_CAPTURE, | ||
"collect_default": True}, | ||
"transactional_message_copier_stderr": { | ||
"path": STDERR_CAPTURE, | ||
"collect_default": True}, | ||
"transactional_message_copier_log": { | ||
"path": LOG_FILE, | ||
"collect_default": True} | ||
} | ||
|
||
def __init__(self, context, num_nodes, kafka, transactional_id, consumer_group, | ||
input_topic, input_partition, output_topic, max_messages = -1, | ||
transaction_size = 1000, log_level="INFO"): | ||
super(TransactionalMessageCopier, self).__init__(context, num_nodes) | ||
self.log_level = log_level | ||
self.kafka = kafka | ||
self.transactional_id = transactional_id | ||
self.consumer_group = consumer_group | ||
self.transaction_size = transaction_size | ||
self.input_topic = input_topic | ||
self.input_partition = input_partition | ||
self.output_topic = output_topic | ||
self.max_messages = max_messages | ||
self.message_copy_finished = False | ||
self.consumed = -1 | ||
self.remaining = -1 | ||
self.stop_timeout_sec = 60 | ||
|
||
def _worker(self, idx, node): | ||
node.account.ssh("mkdir -p %s" % TransactionalMessageCopier.PERSISTENT_ROOT, | ||
allow_fail=False) | ||
# Create and upload log properties | ||
log_config = self.render('tools_log4j.properties', | ||
log_file=TransactionalMessageCopier.LOG_FILE) | ||
node.account.create_file(TransactionalMessageCopier.LOG4J_CONFIG, log_config) | ||
# Configure security | ||
self.security_config = self.kafka.security_config.client_config(node=node) | ||
self.security_config.setup_node(node) | ||
cmd = self.start_cmd(node, idx) | ||
self.logger.debug("TransactionalMessageCopier %d command: %s" % (idx, cmd)) | ||
try: | ||
for line in node.account.ssh_capture(cmd): | ||
line = line.strip() | ||
data = self.try_parse_json(line) | ||
if data is not None: | ||
with self.lock: | ||
self.remaining = int(data["remaining"]) | ||
self.consumed = int(data["consumed"]) | ||
self.logger.info("%s: consumed %d, remaining %d" % | ||
(self.transactional_id, self.consumed, self.remaining)) | ||
if "shutdown_complete" in data: | ||
if self.remaining == 0: | ||
# We are only finished if the remaining | ||
# messages at the time of shutdown is 0. | ||
# | ||
# Otherwise a clean shutdown would still print | ||
# a 'shutdown complete' messages even though | ||
# there are unprocessed messages, causing | ||
# tests to fail. | ||
self.logger.info("%s : Finished message copy" % self.transactional_id) | ||
self.message_copy_finished = True | ||
else: | ||
self.logger.info("%s : Shut down without finishing message copy." %\ | ||
self.transactional_id) | ||
except RemoteCommandError as e: | ||
self.logger.debug("Got exception while reading output from copier, \ | ||
probably because it was SIGKILL'd (exit code 137): %s" % str(e)) | ||
|
||
def start_cmd(self, node, idx): | ||
cmd = "export LOG_DIR=%s;" % TransactionalMessageCopier.LOG_DIR | ||
cmd += " export KAFKA_OPTS=%s;" % self.security_config.kafka_opts | ||
cmd += " export KAFKA_LOG4J_OPTS=\"-Dlog4j.configuration=file:%s\"; " % TransactionalMessageCopier.LOG4J_CONFIG | ||
cmd += self.path.script("kafka-run-class.sh", node) + " org.apache.kafka.tools." + "TransactionalMessageCopier" | ||
cmd += " --broker-list %s" % self.kafka.bootstrap_servers(self.security_config.security_protocol) | ||
cmd += " --transactional-id %s" % self.transactional_id | ||
cmd += " --consumer-group %s" % self.consumer_group | ||
cmd += " --input-topic %s" % self.input_topic | ||
cmd += " --output-topic %s" % self.output_topic | ||
cmd += " --input-partition %s" % str(self.input_partition) | ||
cmd += " --transaction-size %s" % str(self.transaction_size) | ||
if self.max_messages > 0: | ||
cmd += " --max-messages %s" % str(self.max_messages) | ||
cmd += " 2>> %s | tee -a %s &" % (TransactionalMessageCopier.STDERR_CAPTURE, TransactionalMessageCopier.STDOUT_CAPTURE) | ||
|
||
return cmd | ||
|
||
def clean_node(self, node, clean_shutdown=True): | ||
self.kill_node(node, clean_shutdown) | ||
node.account.ssh("rm -rf " + self.PERSISTENT_ROOT, allow_fail=False) | ||
self.security_config.clean_node(node) | ||
|
||
def pids(self, node): | ||
try: | ||
cmd = "ps ax | grep -i TransactionalMessageCopier | grep java | grep -v grep | awk '{print $1}'" | ||
pid_arr = [pid for pid in node.account.ssh_capture(cmd, allow_fail=True, callback=int)] | ||
return pid_arr | ||
except (RemoteCommandError, ValueError) as e: | ||
self.logger.error("Could not list pids: %s" % str(e)) | ||
return [] | ||
|
||
def alive(self, node): | ||
return len(self.pids(node)) > 0 | ||
|
||
def kill_node(self, node, clean_shutdown=True): | ||
pids = self.pids(node) | ||
sig = signal.SIGTERM if clean_shutdown else signal.SIGKILL | ||
for pid in pids: | ||
node.account.signal(pid, sig) | ||
wait_until(lambda: len(self.pids(node)) == 0, timeout_sec=60, err_msg="Message Copier failed to stop") | ||
|
||
def stop_node(self, node, clean_shutdown=True): | ||
self.kill_node(node, clean_shutdown) | ||
stopped = self.wait_node(node, timeout_sec=self.stop_timeout_sec) | ||
assert stopped, "Node %s: did not stop within the specified timeout of %s seconds" % \ | ||
(str(node.account), str(self.stop_timeout_sec)) | ||
|
||
def restart(self, clean_shutdown): | ||
if self.is_done: | ||
return | ||
node = self.nodes[0] | ||
with self.lock: | ||
self.consumed = -1 | ||
self.remaining = -1 | ||
self.stop_node(node, clean_shutdown) | ||
self.start_node(node) | ||
|
||
def try_parse_json(self, string): | ||
"""Try to parse a string as json. Return None if not parseable.""" | ||
try: | ||
record = json.loads(string) | ||
return record | ||
except ValueError: | ||
self.logger.debug("Could not parse as json: %s" % str(string)) | ||
return None | ||
|
||
@property | ||
def is_done(self): | ||
return self.message_copy_finished | ||
|
||
def progress_percent(self): | ||
with self.lock: | ||
if self.remaining < 0: | ||
return 0 | ||
if self.consumed + self.remaining == 0: | ||
return 100 | ||
return (float(self.consumed)/float(self.consumed + self.remaining)) * 100 |
Oops, something went wrong.