diff --git a/ceph.spec.in b/ceph.spec.in index 278aa4f724a37..c732d8cc31508 100644 --- a/ceph.spec.in +++ b/ceph.spec.in @@ -1041,13 +1041,14 @@ mkdir -p %{buildroot}%{_localstatedir}/lib/ceph/mon mkdir -p %{buildroot}%{_localstatedir}/lib/ceph/osd mkdir -p %{buildroot}%{_localstatedir}/lib/ceph/mds mkdir -p %{buildroot}%{_localstatedir}/lib/ceph/mgr +mkdir -p %{buildroot}%{_localstatedir}/lib/ceph/crash +mkdir -p %{buildroot}%{_localstatedir}/lib/ceph/crash/posted mkdir -p %{buildroot}%{_localstatedir}/lib/ceph/radosgw mkdir -p %{buildroot}%{_localstatedir}/lib/ceph/bootstrap-osd mkdir -p %{buildroot}%{_localstatedir}/lib/ceph/bootstrap-mds mkdir -p %{buildroot}%{_localstatedir}/lib/ceph/bootstrap-rgw mkdir -p %{buildroot}%{_localstatedir}/lib/ceph/bootstrap-mgr mkdir -p %{buildroot}%{_localstatedir}/lib/ceph/bootstrap-rbd -mkdir -p %{buildroot}%{_localstatedir}/lib/ceph/crash %if 0%{?suse_version} # create __pycache__ directories and their contents @@ -1063,6 +1064,7 @@ rm -rf %{buildroot} %files %files base +%{_bindir}/ceph-crash %{_bindir}/crushtool %{_bindir}/monmaptool %{_bindir}/osdmaptool @@ -1079,6 +1081,7 @@ rm -rf %{buildroot} %{_libdir}/ceph/erasure-code/libec_*.so* %dir %{_libdir}/ceph/compressor %{_libdir}/ceph/compressor/libceph_*.so* +%{_unitdir}/ceph-crash.service %ifarch x86_64 %dir %{_libdir}/ceph/crypto %{_libdir}/ceph/crypto/libceph_*.so* @@ -1114,6 +1117,8 @@ rm -rf %{buildroot} %{_mandir}/man8/monmaptool.8* %{_mandir}/man8/ceph-kvstore-tool.8* #set up placeholder directories +%attr(750,ceph,ceph) %dir %{_localstatedir}/lib/ceph/crash +%attr(750,ceph,ceph) %dir %{_localstatedir}/lib/ceph/crash/posted %attr(750,ceph,ceph) %dir %{_localstatedir}/lib/ceph/tmp %attr(750,ceph,ceph) %dir %{_localstatedir}/lib/ceph/bootstrap-osd %attr(750,ceph,ceph) %dir %{_localstatedir}/lib/ceph/bootstrap-mds @@ -1126,22 +1131,22 @@ rm -rf %{buildroot} %if 0%{?suse_version} %fillup_only if [ $1 -eq 1 ] ; then -/usr/bin/systemctl preset ceph.target >/dev/null 2>&1 || : +/usr/bin/systemctl preset ceph.target ceph-crash.service >/dev/null 2>&1 || : fi %endif %if 0%{?fedora} || 0%{?rhel} -%systemd_post ceph.target +%systemd_post ceph.target ceph-crash.service %endif if [ $1 -eq 1 ] ; then -/usr/bin/systemctl start ceph.target >/dev/null 2>&1 || : +/usr/bin/systemctl start ceph.target ceph-crash.service >/dev/null 2>&1 || : fi %preun base %if 0%{?suse_version} -%service_del_preun ceph.target +%service_del_preun ceph.target ceph-crash.service %endif %if 0%{?fedora} || 0%{?rhel} -%systemd_preun ceph.target +%systemd_preun ceph.target ceph-crash.service %endif %postun base diff --git a/debian/ceph-base.dirs b/debian/ceph-base.dirs index 262e6f6a508e2..6f580230a65f4 100644 --- a/debian/ceph-base.dirs +++ b/debian/ceph-base.dirs @@ -5,3 +5,4 @@ var/lib/ceph/bootstrap-rgw var/lib/ceph/bootstrap-rbd var/lib/ceph/tmp var/lib/ceph/crash +var/lib/ceph/crash/posted diff --git a/debian/ceph-base.install b/debian/ceph-base.install index 5f366ca172be1..24731dc89db10 100644 --- a/debian/ceph-base.install +++ b/debian/ceph-base.install @@ -1,4 +1,6 @@ etc/init.d/ceph +lib/systemd/system/ceph-crash.service +usr/bin/ceph-crash usr/bin/ceph-debugpack usr/bin/ceph-run usr/bin/crushtool diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index f7ba403422d67..556e9c775d75f 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -581,6 +581,9 @@ configure_file(${CMAKE_SOURCE_DIR}/src/init-ceph.in configure_file(ceph-post-file.in ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ceph-post-file @ONLY) +configure_file(ceph-crash.in + ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ceph-crash @ONLY) + if(WITH_TESTS) install(PROGRAMS ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ceph-debugpack @@ -591,6 +594,7 @@ endif() install(PROGRAMS ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ceph ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ceph-post-file + ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ceph-crash ${CMAKE_SOURCE_DIR}/src/ceph-run ${CMAKE_SOURCE_DIR}/src/ceph-clsinfo DESTINATION bin) diff --git a/src/ceph-crash.in b/src/ceph-crash.in new file mode 100755 index 0000000000000..b43cd782c3fe3 --- /dev/null +++ b/src/ceph-crash.in @@ -0,0 +1,83 @@ +#!@PYTHON_EXECUTABLE@ +# -*- mode:python -*- +# vim: ts=4 sw=4 smarttab expandtab + +import argparse +import logging +import os +import subprocess +import sys +import time + +logging.basicConfig(level=logging.INFO) +log = logging.getLogger(__name__) + + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument( + '-p', '--path', default='/var/lib/ceph/crash', + help='base path to monitor for crash dumps') + parser.add_argument( + '-d', '--delay', default=10.0, type=float, + help='minutes to delay between scans (0 to exit after one)', + ) + return parser.parse_args() + + +def post_crash(path): + pr = subprocess.Popen( + args=['timeout', '30', 'ceph', 'crash', 'post', '-i', '-'], + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) + f = open(os.path.join(path, 'meta'), 'r') + stdout, stderr = pr.communicate(input=f.read()) + rc = pr.wait() + f.close() + if rc != 0: + log.warning('post %s failed: %s' % (path, stderr)) + return rc + + +def scrape_path(path): + for p in os.listdir(path): + crashpath = os.path.join(path, p) + metapath = os.path.join(crashpath, 'meta') + donepath = os.path.join(crashpath, 'done') + if os.path.isfile(metapath): + if not os.path.isfile(donepath): + # hang out just for a bit; either we interrupted the dump + # or the daemon crashed before finishing it + time.sleep(1) + if not os.path.isfile(donepath): + return + # ok, we can process this one + rc = post_crash(crashpath) + if rc == 0: + os.rename(crashpath, os.path.join(path, 'posted/', p)) + log.debug( + "posted %s and renamed %s -> %s " % + (metapath, p, os.path.join('posted/', p)) + ) + + +def main(): + args = parse_args() + postdir = os.path.join(args.path, 'posted') + + while not os.path.isdir(postdir): + log.error("%s does not exist; please create" % postdir) + time.sleep(30) + + log.info("monitoring path %s, delay %ds" % (args.path, args.delay * 60.0)) + while True: + scrape_path(args.path) + if args.delay == 0: + sys.exit(0) + time.sleep(args.delay * 60) + + +if __name__ == "__main__": + main() diff --git a/systemd/50-ceph.preset b/systemd/50-ceph.preset index 34c0801f9f2ba..da3ee0b7b0bcf 100644 --- a/systemd/50-ceph.preset +++ b/systemd/50-ceph.preset @@ -4,3 +4,4 @@ enable ceph-mgr.target enable ceph-mon.target enable ceph-osd.target enable ceph-radosgw.target +enable ceph-crash.service diff --git a/systemd/CMakeLists.txt b/systemd/CMakeLists.txt index 67497f9a89122..56be619d44cab 100644 --- a/systemd/CMakeLists.txt +++ b/systemd/CMakeLists.txt @@ -4,6 +4,7 @@ set(CEPH_SYSTEMD_ENV_DIR "/etc/sysconfig" CACHE PATH "Location for systemd service environmental variable settings files") set(SYSTEMD_ENV_FILE "${CEPH_SYSTEMD_ENV_DIR}/ceph") foreach(service + ceph-crash ceph-fuse@ ceph-mds@ ceph-mgr@ diff --git a/systemd/ceph-crash.service.in b/systemd/ceph-crash.service.in new file mode 100644 index 0000000000000..8304dd61220bb --- /dev/null +++ b/systemd/ceph-crash.service.in @@ -0,0 +1,13 @@ +[Unit] +Description=Ceph crash dump collector + +[Service] +Type=simple +ExecStart=/usr/bin/ceph-crash +Restart=always +RestartSec=10 +StartLimitInterval=10min +StartLimitBurst=10 + +[Install] +WantedBy=ceph.target