Skip to content

Commit

Permalink
Merge pull request ceph#14608 from tchaikov/wip-19594
Browse files Browse the repository at this point in the history
qa/tasks: assert on pg status with a timeout

Reviewed-by: Sage Weil <[email protected]>
  • Loading branch information
tchaikov authored Apr 20, 2017
2 parents 535b2cf + cc61bb8 commit ee653ba
Show file tree
Hide file tree
Showing 4 changed files with 37 additions and 28 deletions.
41 changes: 23 additions & 18 deletions qa/tasks/ceph_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -1317,6 +1317,23 @@ def get_pg_replica(self, pool, pgnum):
return int(pg['acting'][-1])
assert False

def wait_for_pg_stats(func):
# both osd_mon_report_interval_min and mgr_stats_period are 5 seconds
# by default, and take the faulty injection in ms into consideration,
# 12 seconds are more than enough
delays = [1, 1, 2, 3, 5, 8, 13]
@wraps(func)
def wrapper(self, *args, **kwargs):
exc = None
for delay in delays:
try:
return func(self, *args, **kwargs)
except AssertionError as e:
time.sleep(delay)
exc = e
raise exc
return wrapper

def get_pg_primary(self, pool, pgnum):
"""
get primary for pool, pgnum (e.g. (data, 0)->0
Expand Down Expand Up @@ -1710,29 +1727,17 @@ def compile_pg_status(self):
ret[status] += 1
return ret

def pg_scrubbing(self, pool, pgnum):
"""
pg scrubbing wrapper
"""
@wait_for_pg_stats
def with_pg_state(self, pool, pgnum, check):
pgstr = self.get_pgid(pool, pgnum)
stats = self.get_single_pg_stats(pgstr)
return 'scrub' in stats['state']
assert(check(stats['state']))

def pg_repairing(self, pool, pgnum):
"""
pg repairing wrapper
"""
pgstr = self.get_pgid(pool, pgnum)
stats = self.get_single_pg_stats(pgstr)
return 'repair' in stats['state']

def pg_inconsistent(self, pool, pgnum):
"""
pg inconsistent wrapper
"""
@wait_for_pg_stats
def with_pg(self, pool, pgnum, check):
pgstr = self.get_pgid(pool, pgnum)
stats = self.get_single_pg_stats(pgstr)
return 'inconsistent' in stats['state']
return check(stats)

def get_last_scrub_stamp(self, pool, pgnum):
"""
Expand Down
18 changes: 9 additions & 9 deletions qa/tasks/repair_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ def repair_test_1(manager, corrupter, chooser, scrub_type):
log.info("scrubbing")
manager.do_pg_scrub(pool, 0, scrub_type)

assert manager.pg_inconsistent(pool, 0)
manager.with_pg_state(pool, 0, lambda s: 'inconsistent' in s)

# repair
log.info("repairing")
Expand All @@ -104,7 +104,7 @@ def repair_test_1(manager, corrupter, chooser, scrub_type):
manager.do_pg_scrub(pool, 0, scrub_type)

# verify consistent
assert not manager.pg_inconsistent(pool, 0)
manager.with_pg_state(pool, 0, lambda s: 'inconsistent' not in s)
log.info("done")


Expand Down Expand Up @@ -147,13 +147,13 @@ def repair_test_2(ctx, manager, config, chooser):
log.info("scrubbing")
manager.do_pg_scrub(pool, 0, 'deep-scrub')

assert manager.pg_inconsistent(pool, 0)
manager.with_pg_state(pool, 0, lambda s: 'inconsistent' in s)

# Regression test for bug #4778, should still
# be inconsistent after scrub
manager.do_pg_scrub(pool, 0, 'scrub')

assert manager.pg_inconsistent(pool, 0)
manager.with_pg_state(pool, 0, lambda s: 'inconsistent' in s)

# Additional corruptions including 2 types for file1
log.info("corrupting more objects")
Expand All @@ -166,7 +166,7 @@ def repair_test_2(ctx, manager, config, chooser):
log.info("scrubbing")
manager.do_pg_scrub(pool, 0, 'deep-scrub')

assert manager.pg_inconsistent(pool, 0)
manager.with_pg_state(pool, 0, lambda s: 'inconsistent' in s)

# repair
log.info("repairing")
Expand All @@ -176,15 +176,15 @@ def repair_test_2(ctx, manager, config, chooser):
time.sleep(10)

# verify consistent
assert not manager.pg_inconsistent(pool, 0)
manager.with_pg_state(pool, 0, lambda s: 'inconsistent' not in s)

# In the future repair might determine state of
# inconsistency itself, verify with a deep-scrub
log.info("scrubbing")
manager.do_pg_scrub(pool, 0, 'deep-scrub')

# verify consistent
assert not manager.pg_inconsistent(pool, 0)
manager.with_pg_state(pool, 0, lambda s: 'inconsistent' not in s)

log.info("done")

Expand Down Expand Up @@ -230,7 +230,7 @@ def repair_test_erasure_code(manager, corrupter, victim, scrub_type):
log.info("scrubbing")
manager.do_pg_scrub(pool, 0, scrub_type)

assert manager.pg_inconsistent(pool, 0)
manager.with_pg_state(pool, 0, lambda s: 'inconsistent' in s)

# repair
log.info("repairing")
Expand All @@ -240,7 +240,7 @@ def repair_test_erasure_code(manager, corrupter, victim, scrub_type):
manager.do_pg_scrub(pool, 0, scrub_type)

# verify consistent
assert not manager.pg_inconsistent(pool, 0)
manager.with_pg_state(pool, 0, lambda s: 'inconsistent' not in s)
log.info("done")


Expand Down
5 changes: 4 additions & 1 deletion src/mon/OSDMonitor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -868,9 +868,11 @@ OSDMonitor::update_pending_creatings(const OSDMap::Incremental& inc)
}
scan_for_creating_pgs(osdmap.get_pools(),
inc.old_pools,
inc.modified,
&pending_creatings);
scan_for_creating_pgs(inc.new_pools,
inc.old_pools,
inc.modified,
&pending_creatings);
pending_creatings.last_scan_epoch = osdmap.get_epoch();
return pending_creatings;
Expand Down Expand Up @@ -3041,6 +3043,7 @@ void OSDMonitor::check_pg_creates_sub(Subscription *sub)

void OSDMonitor::scan_for_creating_pgs(const map<int64_t,pg_pool_t>& pools,
const set<int64_t>& removed_pools,
utime_t modified,
creating_pgs_t* creating_pgs) const
{
for (auto& p : pools) {
Expand Down Expand Up @@ -3076,7 +3079,7 @@ void OSDMonitor::scan_for_creating_pgs(const map<int64_t,pg_pool_t>& pools,
dout(20) << __func__ << " already have " << pgid << dendl;
continue;
}
creating_pgs->pgs.emplace(pgid, make_pair(created, ceph_clock_now()));
creating_pgs->pgs.emplace(pgid, make_pair(created, modified));
dout(10) << __func__ << " adding " << pgid
<< " at " << osdmap.get_epoch() << dendl;
}
Expand Down
1 change: 1 addition & 0 deletions src/mon/OSDMonitor.h
Original file line number Diff line number Diff line change
Expand Up @@ -432,6 +432,7 @@ class OSDMonitor : public PaxosService {
void trim_creating_pgs(creating_pgs_t *creating_pgs, const PGMap& pgm);
void scan_for_creating_pgs(const std::map<int64_t,pg_pool_t>& pools,
const std::set<int64_t>& removed_pools,
utime_t modified,
creating_pgs_t* creating_pgs) const;
pair<int32_t, pg_t> get_parent_pg(pg_t pgid) const;
void update_creating_pgs();
Expand Down

0 comments on commit ee653ba

Please sign in to comment.