Skip to content

Commit

Permalink
Merge pull request ceph#57147 from xxhdx1985126/wip-65696
Browse files Browse the repository at this point in the history
crimson/osd/pg_recovery: skip unfound objects when recovering the primary

Reviewed-by: Samuel Just <[email protected]>
  • Loading branch information
athanatos authored Jun 17, 2024
2 parents f0a5405 + 3d03ac2 commit f0751f7
Show file tree
Hide file tree
Showing 4 changed files with 67 additions and 13 deletions.
23 changes: 15 additions & 8 deletions src/crimson/osd/osd_operations/background_recovery.cc
Original file line number Diff line number Diff line change
Expand Up @@ -116,15 +116,19 @@ UrgentRecovery::do_recovery()
{
LOG_PREFIX(UrgentRecovery::do_recovery);
DEBUGDPPI("{}: {}", *pg, __func__, *this);
if (!pg->has_reset_since(epoch_started)) {
if (pg->has_reset_since(epoch_started)) {
return seastar::make_ready_future<bool>(false);
}

return pg->find_unfound(epoch_started
).then_interruptible([this] {
return with_blocking_event<RecoveryBackend::RecoveryBlockingEvent,
interruptor>([this] (auto&& trigger) {
return pg->get_recovery_handler()->recover_missing(trigger, soid, need);
}).then_interruptible([] {
return seastar::make_ready_future<bool>(false);
});
}
return seastar::make_ready_future<bool>(false);
});
}

void UrgentRecovery::print(std::ostream &lhs) const
Expand Down Expand Up @@ -164,11 +168,14 @@ PglogBasedRecovery::do_recovery()
if (pg->has_reset_since(epoch_started)) {
return seastar::make_ready_future<bool>(false);
}
return with_blocking_event<RecoveryBackend::RecoveryBlockingEvent,
interruptor>([this] (auto&& trigger) {
return pg->get_recovery_handler()->start_recovery_ops(
trigger,
crimson::common::local_conf()->osd_recovery_max_single_start);
return pg->find_unfound(epoch_started
).then_interruptible([this] {
return with_blocking_event<RecoveryBackend::RecoveryBlockingEvent,
interruptor>([this] (auto&& trigger) {
return pg->get_recovery_handler()->start_recovery_ops(
trigger,
crimson::common::local_conf()->osd_recovery_max_single_start);
});
});
}

Expand Down
34 changes: 34 additions & 0 deletions src/crimson/osd/pg.cc
Original file line number Diff line number Diff line change
Expand Up @@ -244,6 +244,40 @@ void PG::queue_check_readable(epoch_t last_peering_reset, ceph::timespan delay)
std::chrono::duration_cast<seastar::lowres_clock::duration>(delay));
}

PG::interruptible_future<> PG::find_unfound(epoch_t epoch_started)
{
if (!have_unfound()) {
return interruptor::now();
}
PeeringCtx rctx;
if (!peering_state.discover_all_missing(rctx)) {
if (peering_state.state_test(PG_STATE_BACKFILLING)) {
logger().debug(
"{} {} no luck, giving up on this pg for now (in backfill)",
*this, __func__);
std::ignore = get_shard_services().start_operation<LocalPeeringEvent>(
this,
get_pg_whoami(),
get_pgid(),
epoch_started,
epoch_started,
PeeringState::UnfoundBackfill());
} else if (peering_state.state_test(PG_STATE_RECOVERING)) {
logger().debug(
"{} {} no luck, giving up on this pg for now (in recovery)",
*this, __func__);
std::ignore = get_shard_services().start_operation<LocalPeeringEvent>(
this,
get_pg_whoami(),
get_pgid(),
epoch_started,
epoch_started,
PeeringState::UnfoundRecovery());
}
}
return get_shard_services().dispatch_context(get_collection_ref(), std::move(rctx));
}

void PG::recheck_readable()
{
bool changed = false;
Expand Down
7 changes: 4 additions & 3 deletions src/crimson/osd/pg.h
Original file line number Diff line number Diff line change
Expand Up @@ -739,6 +739,10 @@ class PG : public boost::intrusive_ref_counter<
// TODO: see PrimaryLogPG::mark_all_unfound_lost()
return seastar::now();
}
interruptible_future<> find_unfound(epoch_t epoch_started);
bool have_unfound() const {
return peering_state.have_unfound();
}

bool old_peering_msg(epoch_t reply_epoch, epoch_t query_epoch) const;

Expand Down Expand Up @@ -771,9 +775,6 @@ class PG : public boost::intrusive_ref_counter<
friend class SnapTrimEvent;
friend class SnapTrimObjSubEvent;
private:
seastar::future<bool> find_unfound() {
return seastar::make_ready_future<bool>(true);
}

bool can_discard_replica_op(const Message& m, epoch_t m_map_epoch) const;
bool can_discard_op(const MOSDOp& m) const;
Expand Down
16 changes: 14 additions & 2 deletions src/crimson/osd/pg_recovery.cc
Original file line number Diff line number Diff line change
Expand Up @@ -146,11 +146,23 @@ size_t PGRecovery::start_primary_recovery_ops(
} else {
soid = p->second;
}
const pg_missing_item& item = missing.get_items().find(p->second)->second;
++p;

hobject_t head = soid.get_head();

if (pg->get_peering_state().get_missing_loc().is_unfound(soid)) {
logger().debug("{}: object {} unfound", __func__, soid);
++skipped;
continue;
}
if (pg->get_peering_state().get_missing_loc().is_unfound(head)) {
logger().debug("{}: head object {} unfound", __func__, soid);
++skipped;
continue;
}

const pg_missing_item& item = missing.get_items().find(p->second)->second;
++p;

bool head_missing = missing.is_missing(head);
logger().info(
"{} {} item.need {} {} {} {} {}",
Expand Down

0 comments on commit f0751f7

Please sign in to comment.