Skip to content

Commit

Permalink
tools/crushdiff: new tool to test crushmap change
Browse files Browse the repository at this point in the history
A tool to test the effect (number of pgs, objects, bytes moved)
of a crushmap change. This is a wrapper around osdmaptool, hardly
relying on its --test-map-pgs-dump option to get the list of
changed pgs.  Additionally it uses pg stats to calculate the
numbers of objects and bytes moved.

Signed-off-by: Mykola Golub <[email protected]>
  • Loading branch information
trociny committed Aug 24, 2021
1 parent 8aa1400 commit 6c73184
Show file tree
Hide file tree
Showing 4 changed files with 339 additions and 0 deletions.
1 change: 1 addition & 0 deletions ceph.spec.in
Original file line number Diff line number Diff line change
Expand Up @@ -1577,6 +1577,7 @@ exit 0
%{_bindir}/cephfs-data-scan
%{_bindir}/cephfs-journal-tool
%{_bindir}/cephfs-table-tool
%{_bindir}/crushdiff
%{_bindir}/rados
%{_bindir}/radosgw-admin
%{_bindir}/rbd
Expand Down
1 change: 1 addition & 0 deletions debian/ceph-common.install
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ usr/bin/ceph-syn
usr/bin/cephfs-data-scan
usr/bin/cephfs-journal-tool
usr/bin/cephfs-table-tool
usr/bin/crushdiff
usr/bin/rados
usr/bin/radosgw-admin
usr/bin/rbd
Expand Down
2 changes: 2 additions & 0 deletions src/tools/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,8 @@ add_executable(osdmaptool ${osdomaptool_srcs})
target_link_libraries(osdmaptool global)
install(TARGETS osdmaptool DESTINATION bin)

install(PROGRAMS crushdiff DESTINATION bin)

set(ceph-diff-sorted_srcs ceph-diff-sorted.cc)
add_executable(ceph-diff-sorted ${ceph-diff-sorted_srcs})
set_target_properties(ceph-diff-sorted PROPERTIES
Expand Down
335 changes: 335 additions & 0 deletions src/tools/crushdiff
Original file line number Diff line number Diff line change
@@ -0,0 +1,335 @@
#!/usr/bin/python3
#
# A tool to test the effect (number of pgs, objects, bytes moved) of a
# crushmap change. This is a wrapper around osdmaptool, hardly relying
# on its --test-map-pgs-dump option to get the list of changed pgs.
# Additionally it uses pg stats to calculate the numbers of objects
# and bytes moved.
#
# Typical usage:
#
# # Get current crushmap
# $ crushdiff export cm.txt
# # Edit the map
# $ $EDITOR cm.txt
# # Check the result
# $ crushdiff compare cm.txt
# # Install the updated map
# $ crushdiff import cm.txt
#
# By default, crushdiff will use the cluster current osdmap and pg
# stats, which requires access to the cluster. But one can use the
# --osdmap and --pg-dump options to test against previously obtained
# data.
#

import argparse
import re
import json
import os
import sys
import tempfile

#
# Global
#

parser = argparse.ArgumentParser(prog='crushdiff',
description='Tool for updating crush map')
parser.add_argument(
'command',
metavar='compare|export|import',
help='command',
default=None,
)
parser.add_argument(
'-c', '--compiled',
action='store_true',
help='use compiled crush map',
default=False,
)
parser.add_argument(
'crushmap',
metavar='crushmap',
help='crushmap json file',
default=None,
)
parser.add_argument(
'-m', '--osdmap',
metavar='osdmap',
help='',
default=None,
)
parser.add_argument(
'-p', '--pg-dump',
metavar='pg-dump',
help='`ceph pg dump` json output',
default=None,
)
parser.add_argument(
'-v', '--verbose',
action='store_true',
help='be verbose',
default=False,
)

#
# Functions
#

def get_human_readable(bytes, precision=2):
suffixes = ['', 'Ki', 'Mi', 'Gi', 'Ti']
suffix_index = 0
while bytes > 1024 and suffix_index < 4:
# increment the index of the suffix
suffix_index += 1
# apply the division
bytes = bytes / 1024.0
return '%.*f%s' % (precision, bytes, suffixes[suffix_index])

def run_cmd(cmd, verbose=False):
if verbose:
print(cmd, file=sys.stderr, flush=True)
os.system(cmd)

def get_osdmap(file):
with open(file, "r") as f:
return json.load(f)

def get_pools(osdmap):
return {p['pool']: p for p in osdmap['pools']}

def get_erasure_code_profiles(osdmap):
return osdmap['erasure_code_profiles']

def get_pgmap(pg_dump_file):
with open(pg_dump_file, "r") as f:
return json.load(f)['pg_map']

def get_pg_stats(pgmap):
return {pg['pgid']: pg for pg in pgmap['pg_stats']}

def parse_test_map_pgs_dump(file):
# Format:
# pool 1 pg_num 16
# 1.0 [1,0,2] 1
# 1.1 [2,0,1] 2
# ...
# pool 2 pg_num 32
# 2.0 [2,1,0] 2
# 2.1 [2,1,0] 2
# ...
# #osd count first primary c wt wt
# osd.1 208 123 123 0.098587 1

pgs = {}

with open(file, "r") as f:
pool = None
for l in f.readlines():
m = re.match('^pool (\d+) pg_num (\d+)', l)
if m:
pool = m.group(1)
continue
if not pool:
continue
m = re.match('^#osd', l)
if m:
break
m = re.match('^(\d+\.[0-9a-f]+)\s+\[([\d,]+)\]', l)
if not m:
continue
pgid = m.group(1)
osds = [int(x) for x in m.group(2).split(',')]
pgs[pgid] = osds

return pgs

def do_compare(new_crushmap_in, osdmap=None, pg_dump=None, compiled=False,
verbose=False):
with tempfile.TemporaryDirectory() as tmpdirname:
if compiled:
new_crushmap_file = new_crushmap_in
else:
new_crushmap_file = os.path.join(tmpdirname, 'crushmap')
run_cmd('crushtool -c {} -o {}'.format(new_crushmap_in,
new_crushmap_file), verbose)

osdmap_file = os.path.join(tmpdirname, 'osdmap')
if osdmap:
run_cmd('cp {} {}'.format(osdmap, osdmap_file), verbose)
else:
run_cmd('ceph osd getmap -o {}'.format(osdmap_file), verbose)

if not pg_dump:
pg_dump = os.path.join(tmpdirname, 'pg_dump.json')
run_cmd('ceph pg dump --format json > {}'.format(pg_dump), verbose)

old_test_map_pgs_dump = os.path.join(tmpdirname, 'pgs.old.txt')
run_cmd('osdmaptool {} --test-map-pgs-dump > {}'.format(
osdmap_file, old_test_map_pgs_dump), verbose)
if verbose:
run_cmd('cat {} >&2'.format(old_test_map_pgs_dump), True)

new_test_map_pgs_dump = os.path.join(tmpdirname, 'pgs.new.txt')
run_cmd(
'osdmaptool {} --import-crush {} --test-map-pgs-dump > {}'.format(
osdmap_file, new_crushmap_file, new_test_map_pgs_dump), verbose)
if verbose:
run_cmd('cat {} >&2'.format(new_test_map_pgs_dump), True)

osdmap_file_json = os.path.join(tmpdirname, 'osdmap.json')
run_cmd('osdmaptool {} --dump json > {}'.format(
osdmap_file, osdmap_file_json), verbose)
osdmap = get_osdmap(osdmap_file_json)
pools = get_pools(osdmap)
ec_profiles = get_erasure_code_profiles(osdmap)

pgmap = get_pgmap(pg_dump)
pg_stats = get_pg_stats(pgmap)

old_pgs = parse_test_map_pgs_dump(old_test_map_pgs_dump)
new_pgs = parse_test_map_pgs_dump(new_test_map_pgs_dump)

diff_pg_count = 0
total_object_count = 0
diff_object_count = 0
for pgid in old_pgs:
objects = pg_stats[pgid]['stat_sum']['num_objects']
total_object_count += objects

if old_pgs[pgid] == new_pgs[pgid]:
continue

pool_id = int(pgid.split('.')[0])

if len(new_pgs[pgid]) < pools[pool_id]['size']:
print("WARNING: {} will be undersized ({})".format(
pgid, new_pgs[pgid]), file=sys.stderr, flush=True)

if not pools[pool_id]['erasure_code_profile'] and \
sorted(old_pgs[pgid]) == sorted(new_pgs[pgid]):
continue

if verbose:
print("{}\t{} -> {}".format(pgid, old_pgs[pgid], new_pgs[pgid]),
file=sys.stderr, flush=True)
diff_pg_count += 1
diff_object_count += objects

print("{}/{} ({:.2f}%) pgs affected".format(
diff_pg_count, len(old_pgs),
100 * diff_pg_count / len(old_pgs) if len(old_pgs) else 0),
flush=True)
print("{}/{} ({:.2f}%) objects affected".format(
diff_object_count, total_object_count,
100 * diff_object_count / total_object_count \
if total_object_count else 0), flush=True)

total_pg_shard_count = 0
diff_pg_shard_count = 0
total_object_shard_count = 0
diff_object_shard_count = 0
total_bytes = 0
diff_bytes = 0
for pgid in old_pgs:
pool_id = int(pgid.split('.')[0])
ec_profile = pools[pool_id]['erasure_code_profile']
if ec_profile:
k = int(ec_profiles[ec_profile]['k'])
m = int(ec_profiles[ec_profile]['m'])
else:
k = 1
m = pools[pool_id]['size'] - 1

bytes = pg_stats[pgid]['stat_sum']['num_bytes'] + \
pg_stats[pgid]['stat_sum']['num_omap_bytes']
objects = pg_stats[pgid]['stat_sum']['num_objects']

total_pg_shard_count += len(old_pgs[pgid])
total_object_shard_count += objects * (k + m)
total_bytes += bytes * (k + m) / k

if old_pgs[pgid] == new_pgs[pgid]:
continue

old_count = diff_pg_shard_count

if ec_profile:
for i in range(len(old_pgs[pgid])):
if old_pgs[pgid][i] != new_pgs[pgid][i]:
diff_pg_shard_count += 1
diff_object_shard_count += objects
diff_bytes += bytes / k
else:
for osd in old_pgs[pgid]:
if osd not in new_pgs[pgid]:
diff_pg_shard_count += 1
diff_object_shard_count += objects
diff_bytes += bytes / k

if old_count == diff_pg_shard_count:
continue

if verbose:
print("{}\t{} -> {}".format(pgid, old_pgs[pgid], new_pgs[pgid]),
file=sys.stderr, flush=True)

print("{}/{} ({:.2f}%) pg shards to move".format(
diff_pg_shard_count, total_pg_shard_count,
100 * diff_pg_shard_count / total_pg_shard_count \
if total_pg_shard_count else 0), flush=True)
print("{}/{} ({:.2f}%) pg object shards to move".format(
diff_object_shard_count, total_object_shard_count,
100 * diff_object_shard_count / total_object_shard_count \
if total_object_shard_count else 0), flush=True)
print("{}/{} ({:.2f}%) bytes to move".format(
get_human_readable(int(diff_bytes)),
get_human_readable(int(total_bytes)),
100 * diff_bytes / total_bytes if total_bytes else 0),
flush=True)

def do_export(crushmap_out, osdmap_file=None, compiled=False, verbose=False):
with tempfile.TemporaryDirectory() as tmpdirname:
if not osdmap_file:
osdmap_file = os.path.join(tmpdirname, 'osdmap')
run_cmd('ceph osd getmap -o {}'.format(osdmap_file), verbose)

crushmap_file = crushmap_out if compiled else \
os.path.join(tmpdirname, 'crushmap')
run_cmd('osdmaptool {} --export-crush {}'.format(
osdmap_file, crushmap_file), verbose)
if not compiled:
run_cmd('crushtool -d {} -o {}'.format(crushmap_file, crushmap_out),
verbose)

def do_import(crushmap_in, osdmap=None, compiled=False, verbose=False):
with tempfile.TemporaryDirectory() as tmpdirname:
if compiled:
crushmap_file = crushmap_in
else:
crushmap_file = os.path.join(tmpdirname, 'crushmap')
run_cmd('crushtool -c {} -o {}'.format(crushmap_in,
crushmap_file), verbose)
if osdmap:
run_cmd('osdmaptool {} --import-crush {}'.format(
osdmap, crushmap_file), verbose)
else:
run_cmd('ceph osd setcrushmap -i {}'.format(crushmap_file), verbose)

def main():
args = parser.parse_args()

if args.command == 'compare':
do_compare(args.crushmap, args.osdmap, args.pg_dump, args.compiled,
args.verbose)
elif args.command == 'export':
do_export(args.crushmap, args.osdmap, args.compiled, args.verbose)
elif args.command == 'import':
do_import(args.crushmap, args.osdmap, args.compiled, args.verbose)

#
# main
#

main()

0 comments on commit 6c73184

Please sign in to comment.