Skip to content

Commit

Permalink
Merge pull request ceph#15292 from liewegas/wip-map-cache
Browse files Browse the repository at this point in the history
osd: reduce map cache size

Reviewed-by: Kefu Chai <[email protected]>
Reviewed-by: Josh Durgin <[email protected]>
  • Loading branch information
liewegas authored May 28, 2017
2 parents a9a728f + 855955e commit d8f3ee1
Show file tree
Hide file tree
Showing 4 changed files with 40 additions and 13 deletions.
11 changes: 5 additions & 6 deletions src/common/config_opts.h
Original file line number Diff line number Diff line change
Expand Up @@ -738,10 +738,10 @@ OPTION(osd_tier_default_cache_hit_set_grade_decay_rate, OPT_INT, 20)
OPTION(osd_tier_default_cache_hit_set_search_last_n, OPT_INT, 1)

OPTION(osd_map_dedup, OPT_BOOL, true)
OPTION(osd_map_max_advance, OPT_INT, 150) // make this < cache_size!
OPTION(osd_map_cache_size, OPT_INT, 200)
OPTION(osd_map_message_max, OPT_INT, 100) // max maps per MOSDMap message
OPTION(osd_map_share_max_epochs, OPT_INT, 100) // cap on # of inc maps we send to peers, clients
OPTION(osd_map_max_advance, OPT_INT, 40) // make this < cache_size!
OPTION(osd_map_cache_size, OPT_INT, 50)
OPTION(osd_map_message_max, OPT_INT, 40) // max maps per MOSDMap message
OPTION(osd_map_share_max_epochs, OPT_INT, 40) // cap on # of inc maps we send to peers, clients
OPTION(osd_inject_bad_map_crc_probability, OPT_FLOAT, 0)
OPTION(osd_inject_failure_on_pg_removal, OPT_BOOL, false)
// shutdown the OSD if stuatus flipping more than max_markdown_count times in recent max_markdown_period seconds
Expand Down Expand Up @@ -857,7 +857,7 @@ OPTION(osd_default_notify_timeout, OPT_U32, 30) // default notify timeout in sec
OPTION(osd_kill_backfill_at, OPT_INT, 0)

// Bounds how infrequently a new map epoch will be persisted for a pg
OPTION(osd_pg_epoch_persisted_max_stale, OPT_U32, 150) // make this < map_cache_size!
OPTION(osd_pg_epoch_persisted_max_stale, OPT_U32, 40) // make this < map_cache_size!

OPTION(osd_min_pg_log_entries, OPT_U32, 3000) // number of entries to keep in the pg log when trimming it
OPTION(osd_max_pg_log_entries, OPT_U32, 10000) // max entries, say when degraded, before we trim
Expand Down Expand Up @@ -887,7 +887,6 @@ OPTION(osd_debug_reject_backfill_probability, OPT_DOUBLE, 0)
OPTION(osd_debug_inject_copyfrom_error, OPT_BOOL, false) // inject failure during copyfrom completion
OPTION(osd_debug_misdirected_ops, OPT_BOOL, false)
OPTION(osd_debug_skip_full_check_in_recovery, OPT_BOOL, false)
OPTION(osd_enxio_on_misdirected_op, OPT_BOOL, false)
OPTION(osd_debug_verify_cached_snaps, OPT_BOOL, false)
OPTION(osd_enable_op_tracker, OPT_BOOL, true) // enable/disable OSD op tracking
OPTION(osd_num_op_tracker_shard, OPT_U32, 32) // The number of shards for holding the ops
Expand Down
34 changes: 27 additions & 7 deletions src/osd/OSD.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1434,10 +1434,16 @@ void OSDService::send_incremental_map(epoch_t since, Connection *con,
bool OSDService::_get_map_bl(epoch_t e, bufferlist& bl)
{
bool found = map_bl_cache.lookup(e, &bl);
if (found)
if (found) {
if (logger)
logger->inc(l_osd_map_bl_cache_hit);
return true;
}
if (logger)
logger->inc(l_osd_map_bl_cache_miss);
found = store->read(coll_t::meta(),
OSD::get_osdmap_pobject_name(e), 0, 0, bl) >= 0;
OSD::get_osdmap_pobject_name(e), 0, 0, bl,
CEPH_OSD_OP_FLAG_FADVISE_WILLNEED) >= 0;
if (found)
_add_map_bl(e, bl);
return found;
Expand All @@ -1447,10 +1453,16 @@ bool OSDService::get_inc_map_bl(epoch_t e, bufferlist& bl)
{
Mutex::Locker l(map_cache_lock);
bool found = map_bl_inc_cache.lookup(e, &bl);
if (found)
if (found) {
if (logger)
logger->inc(l_osd_map_bl_cache_hit);
return true;
}
if (logger)
logger->inc(l_osd_map_bl_cache_miss);
found = store->read(coll_t::meta(),
OSD::get_inc_osdmap_pobject_name(e), 0, 0, bl) >= 0;
OSD::get_inc_osdmap_pobject_name(e), 0, 0, bl,
CEPH_OSD_OP_FLAG_FADVISE_WILLNEED) >= 0;
if (found)
_add_map_inc_bl(e, bl);
return found;
Expand Down Expand Up @@ -1567,6 +1579,10 @@ void OSDService::reply_op_error(OpRequestRef op, int err, eversion_t v,

void OSDService::handle_misdirected_op(PG *pg, OpRequestRef op)
{
if (!cct->_conf->osd_debug_misdirected_ops) {
return;
}

const MOSDOp *m = static_cast<const MOSDOp*>(op->get_req());
assert(m->get_type() == CEPH_MSG_OSD_OP);

Expand Down Expand Up @@ -1614,9 +1630,6 @@ void OSDService::handle_misdirected_op(PG *pg, OpRequestRef op)
<< " to osd." << whoami
<< " not " << pg->acting
<< " in e" << m->get_map_epoch() << "/" << osdmap->get_epoch();
if (g_conf->osd_enxio_on_misdirected_op) {
reply_op_error(op, -ENXIO);
}
}

void OSDService::enqueue_back(spg_t pgid, PGQueueable qi)
Expand Down Expand Up @@ -2953,6 +2966,7 @@ void OSD::create_logger()
osd_plb.add_u64_counter(
l_osd_waiting_for_map, "messages_delayed_for_map",
"Operations waiting for OSD map");

osd_plb.add_u64_counter(
l_osd_map_cache_hit, "osd_map_cache_hit", "osdmap cache hit");
osd_plb.add_u64_counter(
Expand All @@ -2963,6 +2977,12 @@ void OSD::create_logger()
osd_plb.add_u64_avg(
l_osd_map_cache_miss_low_avg, "osd_map_cache_miss_low_avg",
"osdmap cache miss, avg distance below cache lower bound");
osd_plb.add_u64_counter(
l_osd_map_bl_cache_hit, "osd_map_bl_cache_hit",
"OSDMap buffer cache hits");
osd_plb.add_u64_counter(
l_osd_map_bl_cache_miss, "osd_map_bl_cache_miss",
"OSDMap buffer cache misses");

osd_plb.add_u64(l_osd_stat_bytes, "stat_bytes", "OSD size");
osd_plb.add_u64(l_osd_stat_bytes_used, "stat_bytes_used", "Used space");
Expand Down
2 changes: 2 additions & 0 deletions src/osd/OSD.h
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,8 @@ enum {
l_osd_map_cache_miss,
l_osd_map_cache_miss_low,
l_osd_map_cache_miss_low_avg,
l_osd_map_bl_cache_hit,
l_osd_map_bl_cache_miss,

l_osd_stat_bytes,
l_osd_stat_bytes_used,
Expand Down
6 changes: 6 additions & 0 deletions src/test/osd/osd-config.sh
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,8 @@ function TEST_config_track() {
ceph tell osd.0 injectargs "--osd-map-cache-size $cache" || return 1
CEPH_ARGS='' ceph --admin-daemon $dir/ceph-osd.0.asok log flush || return 1
! grep 'is not > osd_map_max_advance' $dir/osd.0.log || return 1
rm $dir/osd.0.log
CEPH_ARGS='' ceph --admin-daemon $dir/ceph-osd.0.asok log reopen || return 1

#
# increase the osd_map_max_advance above the default cache_size
Expand All @@ -95,6 +97,8 @@ function TEST_config_track() {
ceph tell osd.0 injectargs "--osd-map-max-advance $advance" || return 1
CEPH_ARGS='' ceph --admin-daemon $dir/ceph-osd.0.asok log flush || return 1
grep 'is not > osd_map_max_advance' $dir/osd.0.log || return 1
rm $dir/osd.0.log
CEPH_ARGS='' ceph --admin-daemon $dir/ceph-osd.0.asok log reopen || return 1

#
# increase the osd_pg_epoch_persisted_max_stale above the default cache_size
Expand All @@ -104,6 +108,8 @@ function TEST_config_track() {
ceph tell osd.0 injectargs "--osd-pg-epoch-persisted-max-stale $stale" || return 1
CEPH_ARGS='' ceph --admin-daemon $dir/ceph-osd.0.asok log flush || return 1
grep 'is not > osd_pg_epoch_persisted_max_stale' $dir/osd.0.log || return 1
rm $dir/osd.0.log
CEPH_ARGS='' ceph --admin-daemon $dir/ceph-osd.0.asok log reopen || return 1
}

main osd-config "$@"
Expand Down

0 comments on commit d8f3ee1

Please sign in to comment.