Skip to content

Commit

Permalink
add ceph-crash service
Browse files Browse the repository at this point in the history
ceph-crash runs from systemd and watches /var/lib/ceph/crash
for crashdumps, posting them to the mgrs using the mgr's
crash plugin

Signed-off-by: Dan Mick <[email protected]>
  • Loading branch information
dmick committed Aug 9, 2018
1 parent 34e2853 commit da20184
Show file tree
Hide file tree
Showing 8 changed files with 116 additions and 6 deletions.
17 changes: 11 additions & 6 deletions ceph.spec.in
Original file line number Diff line number Diff line change
Expand Up @@ -1041,13 +1041,14 @@ mkdir -p %{buildroot}%{_localstatedir}/lib/ceph/mon
mkdir -p %{buildroot}%{_localstatedir}/lib/ceph/osd
mkdir -p %{buildroot}%{_localstatedir}/lib/ceph/mds
mkdir -p %{buildroot}%{_localstatedir}/lib/ceph/mgr
mkdir -p %{buildroot}%{_localstatedir}/lib/ceph/crash
mkdir -p %{buildroot}%{_localstatedir}/lib/ceph/crash/posted
mkdir -p %{buildroot}%{_localstatedir}/lib/ceph/radosgw
mkdir -p %{buildroot}%{_localstatedir}/lib/ceph/bootstrap-osd
mkdir -p %{buildroot}%{_localstatedir}/lib/ceph/bootstrap-mds
mkdir -p %{buildroot}%{_localstatedir}/lib/ceph/bootstrap-rgw
mkdir -p %{buildroot}%{_localstatedir}/lib/ceph/bootstrap-mgr
mkdir -p %{buildroot}%{_localstatedir}/lib/ceph/bootstrap-rbd
mkdir -p %{buildroot}%{_localstatedir}/lib/ceph/crash

%if 0%{?suse_version}
# create __pycache__ directories and their contents
Expand All @@ -1063,6 +1064,7 @@ rm -rf %{buildroot}
%files

%files base
%{_bindir}/ceph-crash
%{_bindir}/crushtool
%{_bindir}/monmaptool
%{_bindir}/osdmaptool
Expand All @@ -1079,6 +1081,7 @@ rm -rf %{buildroot}
%{_libdir}/ceph/erasure-code/libec_*.so*
%dir %{_libdir}/ceph/compressor
%{_libdir}/ceph/compressor/libceph_*.so*
%{_unitdir}/ceph-crash.service
%ifarch x86_64
%dir %{_libdir}/ceph/crypto
%{_libdir}/ceph/crypto/libceph_*.so*
Expand Down Expand Up @@ -1114,6 +1117,8 @@ rm -rf %{buildroot}
%{_mandir}/man8/monmaptool.8*
%{_mandir}/man8/ceph-kvstore-tool.8*
#set up placeholder directories
%attr(750,ceph,ceph) %dir %{_localstatedir}/lib/ceph/crash
%attr(750,ceph,ceph) %dir %{_localstatedir}/lib/ceph/crash/posted
%attr(750,ceph,ceph) %dir %{_localstatedir}/lib/ceph/tmp
%attr(750,ceph,ceph) %dir %{_localstatedir}/lib/ceph/bootstrap-osd
%attr(750,ceph,ceph) %dir %{_localstatedir}/lib/ceph/bootstrap-mds
Expand All @@ -1126,22 +1131,22 @@ rm -rf %{buildroot}
%if 0%{?suse_version}
%fillup_only
if [ $1 -eq 1 ] ; then
/usr/bin/systemctl preset ceph.target >/dev/null 2>&1 || :
/usr/bin/systemctl preset ceph.target ceph-crash.service >/dev/null 2>&1 || :
fi
%endif
%if 0%{?fedora} || 0%{?rhel}
%systemd_post ceph.target
%systemd_post ceph.target ceph-crash.service
%endif
if [ $1 -eq 1 ] ; then
/usr/bin/systemctl start ceph.target >/dev/null 2>&1 || :
/usr/bin/systemctl start ceph.target ceph-crash.service >/dev/null 2>&1 || :
fi

%preun base
%if 0%{?suse_version}
%service_del_preun ceph.target
%service_del_preun ceph.target ceph-crash.service
%endif
%if 0%{?fedora} || 0%{?rhel}
%systemd_preun ceph.target
%systemd_preun ceph.target ceph-crash.service
%endif

%postun base
Expand Down
1 change: 1 addition & 0 deletions debian/ceph-base.dirs
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@ var/lib/ceph/bootstrap-rgw
var/lib/ceph/bootstrap-rbd
var/lib/ceph/tmp
var/lib/ceph/crash
var/lib/ceph/crash/posted
2 changes: 2 additions & 0 deletions debian/ceph-base.install
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
etc/init.d/ceph
lib/systemd/system/ceph-crash.service
usr/bin/ceph-crash
usr/bin/ceph-debugpack
usr/bin/ceph-run
usr/bin/crushtool
Expand Down
4 changes: 4 additions & 0 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -581,6 +581,9 @@ configure_file(${CMAKE_SOURCE_DIR}/src/init-ceph.in
configure_file(ceph-post-file.in
${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ceph-post-file @ONLY)

configure_file(ceph-crash.in
${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ceph-crash @ONLY)

if(WITH_TESTS)
install(PROGRAMS
${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ceph-debugpack
Expand All @@ -591,6 +594,7 @@ endif()
install(PROGRAMS
${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ceph
${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ceph-post-file
${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ceph-crash
${CMAKE_SOURCE_DIR}/src/ceph-run
${CMAKE_SOURCE_DIR}/src/ceph-clsinfo
DESTINATION bin)
Expand Down
83 changes: 83 additions & 0 deletions src/ceph-crash.in
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
#!@PYTHON_EXECUTABLE@
# -*- mode:python -*-
# vim: ts=4 sw=4 smarttab expandtab

import argparse
import logging
import os
import subprocess
import sys
import time

logging.basicConfig(level=logging.INFO)
log = logging.getLogger(__name__)


def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument(
'-p', '--path', default='/var/lib/ceph/crash',
help='base path to monitor for crash dumps')
parser.add_argument(
'-d', '--delay', default=10.0, type=float,
help='minutes to delay between scans (0 to exit after one)',
)
return parser.parse_args()


def post_crash(path):
pr = subprocess.Popen(
args=['timeout', '30', 'ceph', 'crash', 'post', '-i', '-'],
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
)
f = open(os.path.join(path, 'meta'), 'r')
stdout, stderr = pr.communicate(input=f.read())
rc = pr.wait()
f.close()
if rc != 0:
log.warning('post %s failed: %s' % (path, stderr))
return rc


def scrape_path(path):
for p in os.listdir(path):
crashpath = os.path.join(path, p)
metapath = os.path.join(crashpath, 'meta')
donepath = os.path.join(crashpath, 'done')
if os.path.isfile(metapath):
if not os.path.isfile(donepath):
# hang out just for a bit; either we interrupted the dump
# or the daemon crashed before finishing it
time.sleep(1)
if not os.path.isfile(donepath):
return
# ok, we can process this one
rc = post_crash(crashpath)
if rc == 0:
os.rename(crashpath, os.path.join(path, 'posted/', p))
log.debug(
"posted %s and renamed %s -> %s " %
(metapath, p, os.path.join('posted/', p))
)


def main():
args = parse_args()
postdir = os.path.join(args.path, 'posted')

while not os.path.isdir(postdir):
log.error("%s does not exist; please create" % postdir)
time.sleep(30)

log.info("monitoring path %s, delay %ds" % (args.path, args.delay * 60.0))
while True:
scrape_path(args.path)
if args.delay == 0:
sys.exit(0)
time.sleep(args.delay * 60)


if __name__ == "__main__":
main()
1 change: 1 addition & 0 deletions systemd/50-ceph.preset
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@ enable ceph-mgr.target
enable ceph-mon.target
enable ceph-osd.target
enable ceph-radosgw.target
enable ceph-crash.service
1 change: 1 addition & 0 deletions systemd/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ set(CEPH_SYSTEMD_ENV_DIR "/etc/sysconfig"
CACHE PATH "Location for systemd service environmental variable settings files")
set(SYSTEMD_ENV_FILE "${CEPH_SYSTEMD_ENV_DIR}/ceph")
foreach(service
ceph-crash
ceph-fuse@
ceph-mds@
ceph-mgr@
Expand Down
13 changes: 13 additions & 0 deletions systemd/ceph-crash.service.in
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
[Unit]
Description=Ceph crash dump collector

[Service]
Type=simple
ExecStart=/usr/bin/ceph-crash
Restart=always
RestartSec=10
StartLimitInterval=10min
StartLimitBurst=10

[Install]
WantedBy=ceph.target

0 comments on commit da20184

Please sign in to comment.