Skip to content

Commit

Permalink
md/raid5: fix 'out of memory' during raid cache recovery
Browse files Browse the repository at this point in the history
This fixes the case when md array assembly fails because of raid cache recovery
unable to allocate a stripe, despite attempts to replay stripes and increase
cache size. This happens because stripes released by r5c_recovery_replay_stripes
and raid5_set_cache_size don't become available for allocation immediately.
Released stripes first are placed on conf->released_stripes list and require
md thread to merge them on conf->inactive_list before they can be allocated.

Patch allows final allocation attempt during cache recovery to wait for
new stripes to become availabe for allocation.

Cc: [email protected]
Cc: Shaohua Li <[email protected]>
Cc: linux-stable <[email protected]> # 4.10+
Fixes: b4c625c ("md/r5cache: r5cache recovery: part 1")
Signed-off-by: Alexei Naberezhnov <[email protected]>
Signed-off-by: Song Liu <[email protected]>
  • Loading branch information
Alexei Naberezhnov authored and liu-song-6 committed Jan 28, 2019
1 parent f17b5f0 commit 483cbbe
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 13 deletions.
33 changes: 22 additions & 11 deletions drivers/md/raid5-cache.c
Original file line number Diff line number Diff line change
Expand Up @@ -1935,12 +1935,14 @@ r5l_recovery_replay_one_stripe(struct r5conf *conf,
}

static struct stripe_head *
r5c_recovery_alloc_stripe(struct r5conf *conf,
sector_t stripe_sect)
r5c_recovery_alloc_stripe(
struct r5conf *conf,
sector_t stripe_sect,
int noblock)
{
struct stripe_head *sh;

sh = raid5_get_active_stripe(conf, stripe_sect, 0, 1, 0);
sh = raid5_get_active_stripe(conf, stripe_sect, 0, noblock, 0);
if (!sh)
return NULL; /* no more stripe available */

Expand Down Expand Up @@ -2150,7 +2152,7 @@ r5c_recovery_analyze_meta_block(struct r5l_log *log,
stripe_sect);

if (!sh) {
sh = r5c_recovery_alloc_stripe(conf, stripe_sect);
sh = r5c_recovery_alloc_stripe(conf, stripe_sect, 1);
/*
* cannot get stripe from raid5_get_active_stripe
* try replay some stripes
Expand All @@ -2159,20 +2161,29 @@ r5c_recovery_analyze_meta_block(struct r5l_log *log,
r5c_recovery_replay_stripes(
cached_stripe_list, ctx);
sh = r5c_recovery_alloc_stripe(
conf, stripe_sect);
conf, stripe_sect, 1);
}
if (!sh) {
int new_size = conf->min_nr_stripes * 2;
pr_debug("md/raid:%s: Increasing stripe cache size to %d to recovery data on journal.\n",
mdname(mddev),
conf->min_nr_stripes * 2);
raid5_set_cache_size(mddev,
conf->min_nr_stripes * 2);
sh = r5c_recovery_alloc_stripe(conf,
stripe_sect);
new_size);
ret = raid5_set_cache_size(mddev, new_size);
if (conf->min_nr_stripes <= new_size / 2) {
pr_err("md/raid:%s: Cannot increase cache size, ret=%d, new_size=%d, min_nr_stripes=%d, max_nr_stripes=%d\n",
mdname(mddev),
ret,
new_size,
conf->min_nr_stripes,
conf->max_nr_stripes);
return -ENOMEM;
}
sh = r5c_recovery_alloc_stripe(
conf, stripe_sect, 0);
}
if (!sh) {
pr_err("md/raid:%s: Cannot get enough stripes due to memory pressure. Recovery failed.\n",
mdname(mddev));
mdname(mddev));
return -ENOMEM;
}
list_add_tail(&sh->lru, cached_stripe_list);
Expand Down
8 changes: 6 additions & 2 deletions drivers/md/raid5.c
Original file line number Diff line number Diff line change
Expand Up @@ -6369,6 +6369,7 @@ raid5_show_stripe_cache_size(struct mddev *mddev, char *page)
int
raid5_set_cache_size(struct mddev *mddev, int size)
{
int result = 0;
struct r5conf *conf = mddev->private;

if (size <= 16 || size > 32768)
Expand All @@ -6385,11 +6386,14 @@ raid5_set_cache_size(struct mddev *mddev, int size)

mutex_lock(&conf->cache_size_mutex);
while (size > conf->max_nr_stripes)
if (!grow_one_stripe(conf, GFP_KERNEL))
if (!grow_one_stripe(conf, GFP_KERNEL)) {
conf->min_nr_stripes = conf->max_nr_stripes;
result = -ENOMEM;
break;
}
mutex_unlock(&conf->cache_size_mutex);

return 0;
return result;
}
EXPORT_SYMBOL(raid5_set_cache_size);

Expand Down

0 comments on commit 483cbbe

Please sign in to comment.