Skip to content

Commit

Permalink
Merge remote-tracking branch 'gh/wip-ceph-kdump-copy'
Browse files Browse the repository at this point in the history
  • Loading branch information
liewegas committed May 1, 2012
2 parents 18790b1 + 1b2a066 commit 627761f
Show file tree
Hide file tree
Showing 7 changed files with 319 additions and 11 deletions.
1 change: 1 addition & 0 deletions ceph.spec.in
Original file line number Diff line number Diff line change
Expand Up @@ -315,6 +315,7 @@ fi
%{_bindir}/boto_tool
%{_bindir}/ceph-coverage
%{_bindir}/obsync
%{_bindir}/ceph-kdump-copy
%{_initrddir}/ceph
%dir %{_libdir}/rados-classes
/sbin/mkcephfs
Expand Down
33 changes: 33 additions & 0 deletions debian/ceph-kdump-copy.default
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# ceph-kdump-copy configuration
# ---------------------------------------------------------------------------

# ---------------------------------------------------------------------------
# Remote host information:
#
# These first two MUST be specified
# KDUMP_HOST - The remote host to which kdumps will be copied.
# KDUMP_HOST_USER - The ssh user on KDUMP_HOST that has write
# permission in KDUMP_HOST_COREDIR.
KDUMP_HOST="YOU_MUST_SPECIFY_THIS"
KDUMP_HOST_USER="YOU_MUST_SPECIFY_THIS_TOO"

# KDUMP_HOST_COREDIR - Full path to the directory on KDUMP_HOST that
# will contain copied kdumps. If not set, "/var/crash/remote" is
# the default.
# KDUMP_HOST_COREDIR="/var/crash/remote"

# KDUMP_HOST_MY_ID - Name for "me", used as the directory name
# under KDUMP_HOST_COREDIR under which all kdumps from this
# host are placed. Each kdump is identified by a date stamp.
# If not set, "$(hostname)" is the default.
# KDUMP_HOST_MY_ID="$(hostname)"

# ---------------------------------------------------------------------------
# Local host information:
#
# The local directory in which dumps are saved. If not set, "/var/crash"
# is the default.
# KDUMP_COREDIR="/var/crash"

# ---------------------------------------------------------------------------
# Architecture specific Overrides:
69 changes: 69 additions & 0 deletions debian/ceph-kdump-copy.init
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
#! /bin/sh
### BEGIN INIT INFO
# Provides: ceph-kdump-copy
# Required-Start: $kdump
# Required-Stop:
# Default-Start: 2
# Default-Stop: 6
# Short-Description: Copies kdump crash files to remote server
# Description: This file is used to move crash files generated
# by Ubuntu apport via the kdump init script to a
# remote host.
### END INIT INFO

# Author: Alex Elder <[email protected]>

# To install and activate this init script:
# update-rc.d ceph-kdump-copy start 02 2 .
# To deactivate and uninstall this init script:
# update-rc.d -f ceph-kdump-copy remove

# PATH should only include /usr/* if it runs after the mountnfs.sh script
PATH="/sbin:/usr/sbin:/bin:/usr/bin"
DESC="Copies kdump crash files to remote server"
NAME="ceph-kdump-copy"
SCRIPTNAME="/etc/init.d/${NAME}"
CONFIGFILE="/etc/default/${NAME}"

# Exit if the copy command is not installed
[ -x "/usr/bin/ceph-kdump-copy" ] || exit 0

# Read configuration variable file if it is present
[ -r "${CONFIGFILE}" ] && . "${CONFIGFILE}"

[ -z "${KDUMP_HOST}" ] &&
err "please specify KDUMP_HOST in '${CONFIGFILE}'"
[ -z "${KDUMP_HOST_USER}" ] &&
err "please specify KDUMP_HOST_USER in '${CONFIGFILE}'"
export KDUMP_HOST KDUMP_HOST_USER

# Load the VERBOSE setting and other rcS variables
. /lib/init/vars.sh

# Define LSB log_* functions.
# Depend on lsb-base (>= 3.2-14) to ensure that this file is present
# and status_of_proc is working.
. /lib/lsb/init-functions

case "$1" in
start)
[ "$VERBOSE" != no ] && log_action_begin_msg "Copying kdump files"
/usr/bin/ceph-kdump-copy
if [ "$?" -eq 0 ]; then
[ "$VERBOSE" != no ] && log_end_msg 0
else
[ "$VERBOSE" != no ] && log_end_msg 1
fi
;;
stop) # No-op
;;
status|reload|force-reload|restart)
echo "Error: argument '$1' not supported" >&2
echo "Usage: $SCRIPTNAME {start|stop}" >&2
exit 3
;;
*)
echo "Usage: $SCRIPTNAME {start|stop}" >&2
exit 3
;;
esac
1 change: 1 addition & 0 deletions debian/ceph-kdump-copy.install
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
usr/bin/ceph-kdump-copy
14 changes: 14 additions & 0 deletions debian/control
Original file line number Diff line number Diff line change
Expand Up @@ -322,3 +322,17 @@ Description: Python libraries for the Ceph distributed filesystem
.
This package contains Python libraries for interacting with Ceph's
RADOS object storage, and RBD (RADOS block device).

Package: ceph-kdump-copy
Architecture: linux-any
Section: devel
Priority: extra
Depends: ${misc:Depends}, linux-crashdump
Description: Shell script to repackage linux crashdump files generated by
Ubuntu apport, then copy them to a remote system to facilitate
offline analysis. Also includes an init script and associated
configuration file to allow this to happen automatically at boot
time.
.
This package contains an init script and supporting shell script to
copy crashdump files to a remote server.
24 changes: 13 additions & 11 deletions src/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -807,7 +807,11 @@ editpaths = sed \
-e 's|@prefix[@]|$(prefix)|g' \
-e 's|@@GCOV_PREFIX_STRIP[@][@]|$(GCOV_PREFIX_STRIP)|g'

init-ceph mkcephfs ceph-debugpack ceph-coverage: init-ceph.in mkcephfs.in Makefile ceph-debugpack.in ceph-coverage.in
shell_scripts = init-ceph mkcephfs ceph-debugpack ceph-coverage ceph-kdump-copy

$(shell_scripts): Makefile

$(shell_scripts): %: %.in
rm -f $@ $@.tmp
$(editpaths) '$(srcdir)/[email protected]' >$@.tmp
chmod +x $@.tmp
Expand All @@ -821,13 +825,11 @@ bin_SCRIPTS += obsync/obsync
bin_SCRIPTS += obsync/boto_tool
bin_SCRIPTS += ceph-coverage

CLEANFILES += \
ceph-debugpack \
ceph_ver.h \
init-ceph \
mkcephfs \
sample.fetch_config \
ceph-coverage
bin_SCRIPTS += ceph-kdump-copy

CLEANFILES += $(shell_scripts)

CLEANFILES += ceph_ver.h sample.fetch_config

##

Expand Down Expand Up @@ -860,17 +862,17 @@ endif

# extra bits
EXTRA_DIST = $(srcdir)/verify-mds-journal.sh $(srcdir)/vstart.sh $(srcdir)/stop.sh \
ceph-run $(srcdir)/ceph_common.sh $(srcdir)/init-ceph.in $(srcdir)/mkcephfs.in \
ceph-run $(srcdir)/ceph_common.sh \
$(srcdir)/init-radosgw \
$(srcdir)/ceph-debugpack.in \
$(srcdir)/ceph-coverage.in \
$(srcdir)/ceph-clsinfo $(srcdir)/make_version $(srcdir)/check_version \
$(srcdir)/.git_version \
$(srcdir)/ceph-rbdnamer \
$(ceph_tool_gui_DATA) \
$(srcdir)/test/encoding/readable.sh \
$(srcdir)/test/encoding/check-generated.sh

EXTRA_DIST += $(srcdir)/$(shell_scripts: %=%.in)

# work around old versions of automake that don't define $docdir
# NOTE: this won't work on suse, where docdir is /usr/share/doc/packages/$package.
docdir ?= ${datadir}/doc/ceph
Expand Down
188 changes: 188 additions & 0 deletions src/ceph-kdump-copy.in
Original file line number Diff line number Diff line change
@@ -0,0 +1,188 @@
#!/bin/bash -norc

# Copyright (C) 2012 Alex Elder <[email protected]>
#
# This is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License version 2.1, as published by the Free Software
# Foundation. See file COPYING.

PROGNAME=$(basename $0)

# issue a message to stderr and exit
function err() {
echo "${PROGNAME}: $@" >&2
exit 1
}

######################

# This script is normally called by @sysconfdir@/init.d/ceph-kdump-copy,
# which will set up these variables based on its config file, found
# in @sysconfdir@/default/ceph-kdump-copy.

[ -z "${KDUMP_HOST}" ] &&
err "KDUMP_HOST must be specified"
[ -z "${KDUMP_HOST_USER}" ] &&
err "KDUMP_HOST_UESR must be specified"

# The local directory in which dumps are saved.
KDUMP_COREDIR="${KDUMP_COREDIR:-/var/crash}"

# Subdirectory on dump host under which my dumps are collected
KDUMP_HOST_MY_ID="${KDUMP_HOST_MY_ID:-$(hostname)}"

# Path on the dump host to the directory in which dumps are copied.
KDUMP_HOST_COREDIR="${KDUMP_HOST_COREDIR:-/var/crash/remote}"

KDUMP_HOST_MY_COREDIR="${KDUMP_HOST_COREDIR}/${KDUMP_HOST_MY_ID}"

#####################################################################


# If no arguments are provided, it is a simple usage message (no error).
# Otherwise display the message before printing usage information, and
# exit with status indicating error.
function usage () {
local status=0

echo "" >&2
if [ $# -gt 0 ]; then
status=1
echo "${PROGNAME}: $@" >&2
echo "" >&2
fi
echo "Usage: ${PROGNAME}" >&2
echo "" >&2
echo " each crash_file is the name of a crash file in " >&2
echo " ${KDUMP_COREDIR} generated by kernel_crashdump" >&2
echo "" >&2

exit ${status}
}

# Run a command (or semicolon-separated commands) on the dump host
function on_dump_host() {
ssh -T "${KDUMP_HOST_USER}@${KDUMP_HOST}" "$@"
}

# Create a summary file based on apport crashfile content
function summarize() {
echo "Crash Summary"
echo "-------------"
echo "hostname: $(hostname)"
echo "host arch: $(arch)"
echo "time collected: $(date)"
echo "crash_dir: ${CRASH_DIR}"
echo ""
echo "crash uname: $(cat Uname)"
echo "crash timestamp: $(cat Date)"
echo "kernel package: $(cat Package)"
echo "distribution: $(cat DistroRelease)"
}

# Collect information related to a dump file. The file name provided
# is the name of a file in ${KDUMP_COREDIR} containing a crash file
# generated by /usr/share/apport/kernel_crashdump. Ubuntu uses its
# apport package to bundle up information from the crash. We'll unpack
# that and re-bundle it in a way less specific to Ubuntu. We'll also
# gather a few more files to make the result self-contained.
function collect_dump_info() {
[ $# -eq 2 ] || exit 99
local crash_release="$1"
local crash_dir="$2"
local i original copy

# We need the debug version of vmlinux matching the dump.
# Grab a few other useful files from /boot as well.
for i in @libdir@/debug/boot/vmlinux \
/boot/System.map /boot/vmcoreinfo \
/boot/config /boot/abi
do
original="${i}-${crash_release}"
copy="${crash_dir}/$(basename "${original}")"

cp "${original}" "${copy}"
gzip "${copy}" # Compressing could be optional
done
}

# Copy a directory containing a kdump and associated files.
function move_crash_to_repository() {
[ $# -eq 1 ] || exit 99
local crash_dir="$1"

tar cf - "./${crash_dir}" |
on_dump_host "tar -C '${KDUMP_HOST_MY_COREDIR}' -xf -" &&
# Removing it should be the default, but optionally skipped
rm -rf "./${crash_dir}"
}

# Process a single apport-generated crash file
function process_crash_file() {
[ $# -eq 1 ] || exit 99
local crash_file="$1"
local apport_dir crash_release crash_dir

apport_dir="${crash_file}-apport_dir"
mkdir "${apport_dir}"

# Unpack the crash file
apport-unpack "${crash_file}" "${apport_dir}"
rm -f "${crash_file}"

# Grab the release id from the kernel that crashed
crash_release=$(cat "${apport_dir}"/Uname | awk '{print $2}')

# Create a date-stamped directory in which to hold this crash
crash_dir=$(date '+%F-%T%z')
mkdir "${crash_dir}"

# Produce a summary and save the actual core file
( cd "${apport_dir}"; summarize ) > "${crash_dir}/summary.txt"
gzip "${crash_dir}/summary.txt"

# Save and compress the actual core file
mv ${apport_dir}/VmCore "${crash_dir}/vmcore-${crash_release}"
gzip "${crash_dir}/vmcore-${crash_release}"

# We've got what we need from the crash file
rm -rf "${apport_dir}"

# Collect the other related files
collect_dump_info "${crash_release}" "${crash_dir}"

# Create a little README file
(
echo "To analyze the kernel core dump here:"
echo " gunzip 'vmcore-${crash_release}'"
echo " crash 'vmlinux-${crash_release}.gz' \\"
echo " 'vmcore-${crash_release}'"
echo ""
echo "Other files provide additional context."
) > "${crash_dir}/README"

# Remove other files we don't have any need for
rm config_link kernel_link system.map_link

# Finally, copy the crash directory over to the repository
move_crash_to_repository "${crash_dir}"
}

######### Start #########

cd "${KDUMP_COREDIR}"
CRASH_FILES=$(ls linux-image-*.crash 2> /dev/null)
[ -z "${CRASH_FILES}" ] && exit 0 # Quit if there's nothing to do

# Make sure the directory to contain our dumps is there on the dump host
on_dump_host "mkdir -p '${KDUMP_HOST_MY_COREDIR}'" ||
err "unable to create '${KDUMP_HOST_MY_COREDIR}' on host '${KDUMP_HOST}'"

# Now process each crash file; exit on the first error.
for crash_file in ${CRASH_FILES}; do
process_crash_file "${crash_file}" ||
err "unable to process '${crsh_file}'"
done

exit 0

0 comments on commit 627761f

Please sign in to comment.