Skip to content

Commit

Permalink
Merge PR ceph#44054 into master
Browse files Browse the repository at this point in the history
* refs/pull/44054/head:
	doc/rados/operations: document pg_num_max
	mgr: set max of 32 pgs for .mgr pool
	mgr/dashboard: expect pg_num_max property for pools
	mon/OSDMonitor: add option --pg-num_max arg for create pool
	mon/OSDMonitor: disallow setting pg_num < min or > max
	mgr/pg_autoscaler: apply pg_num_max
	mon: add pg_num_max pool property

Reviewed-by: Neha Ojha <[email protected]>
Reviewed-by: Patrick Donnelly <[email protected]>
  • Loading branch information
liewegas committed Jan 6, 2022
2 parents 3b7513e + 7ae6216 commit d2265e8
Show file tree
Hide file tree
Showing 11 changed files with 78 additions and 12 deletions.
9 changes: 5 additions & 4 deletions doc/rados/operations/placement-groups.rst
Original file line number Diff line number Diff line change
Expand Up @@ -218,13 +218,14 @@ parallelism client will see when doing IO, even when a pool is mostly
empty. Setting the lower bound prevents Ceph from reducing (or
recommending you reduce) the PG number below the configured number.

You can set the minimum number of PGs for a pool with::
You can set the minimum or maximum number of PGs for a pool with::

ceph osd pool set <pool-name> pg_num_min <num>
ceph osd pool set <pool-name> pg_num_max <num>

You can also specify the minimum PG count at pool creation time with
the optional ``--pg-num-min <num>`` argument to the ``ceph osd pool
create`` command.
You can also specify the minimum or maximum PG count at pool creation
time with the optional ``--pg-num-min <num>`` or ``--pg-num-max
<num>`` arguments to the ``ceph osd pool create`` command.

.. _preselection:

Expand Down
17 changes: 17 additions & 0 deletions qa/workunits/mon/pool_ops.sh
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,23 @@ function expect_config_value()
fi
}

# pg_num min/max
TEST_POOL=testpool1234
ceph osd pool create testpool1234 8 --autoscale-mode off
ceph osd pool set $TEST_POOL pg_num_min 2
ceph osd pool get $TEST_POOL pg_num_min | grep 2
ceph osd pool set $TEST_POOL pg_num_max 33
ceph osd pool get $TEST_POOL pg_num_max | grep 33
expect_false ceph osd pool set $TEST_POOL pg_num_min 9
expect_false ceph osd pool set $TEST_POOL pg_num_max 7
expect_false ceph osd pool set $TEST_POOL pg_num 1
expect_false ceph osd pool set $TEST_POOL pg_num 44
ceph osd pool set $TEST_POOL pg_num_min 0
expect_false ceph osd pool get $TEST_POOL pg_num_min
ceph osd pool set $TEST_POOL pg_num_max 0
expect_false ceph osd pool get $TEST_POOL pg_num_max
ceph osd pool delete $TEST_POOL $TEST_POOL --yes-i-really-really-mean-it

# note: we need to pass the other args or ceph_argparse.py will take
# 'invalid' that is not replicated|erasure and assume it is the next
# argument, which is a string.
Expand Down
5 changes: 3 additions & 2 deletions src/mon/MonCommands.h
Original file line number Diff line number Diff line change
Expand Up @@ -1057,6 +1057,7 @@ COMMAND("osd pool create "
"name=expected_num_objects,type=CephInt,range=0,req=false "
"name=size,type=CephInt,range=0,req=false "
"name=pg_num_min,type=CephInt,range=0,req=false "
"name=pg_num_max,type=CephInt,range=0,req=false "
"name=autoscale_mode,type=CephChoices,strings=on|off|warn,req=false "
"name=bulk,type=CephBool,req=false "
"name=target_size_bytes,type=CephInt,range=0,req=false "
Expand All @@ -1083,11 +1084,11 @@ COMMAND("osd pool rename "
"rename <srcpool> to <destpool>", "osd", "rw")
COMMAND("osd pool get "
"name=pool,type=CephPoolname "
"name=var,type=CephChoices,strings=size|min_size|pg_num|pgp_num|crush_rule|hashpspool|nodelete|nopgchange|nosizechange|write_fadvise_dontneed|noscrub|nodeep-scrub|hit_set_type|hit_set_period|hit_set_count|hit_set_fpp|use_gmt_hitset|target_max_objects|target_max_bytes|cache_target_dirty_ratio|cache_target_dirty_high_ratio|cache_target_full_ratio|cache_min_flush_age|cache_min_evict_age|erasure_code_profile|min_read_recency_for_promote|all|min_write_recency_for_promote|fast_read|hit_set_grade_decay_rate|hit_set_search_last_n|scrub_min_interval|scrub_max_interval|deep_scrub_interval|recovery_priority|recovery_op_priority|scrub_priority|compression_mode|compression_algorithm|compression_required_ratio|compression_max_blob_size|compression_min_blob_size|csum_type|csum_min_block|csum_max_block|allow_ec_overwrites|fingerprint_algorithm|pg_autoscale_mode|pg_autoscale_bias|pg_num_min|target_size_bytes|target_size_ratio|dedup_tier|dedup_chunk_algorithm|dedup_cdc_chunk_size|eio|bulk",
"name=var,type=CephChoices,strings=size|min_size|pg_num|pgp_num|crush_rule|hashpspool|nodelete|nopgchange|nosizechange|write_fadvise_dontneed|noscrub|nodeep-scrub|hit_set_type|hit_set_period|hit_set_count|hit_set_fpp|use_gmt_hitset|target_max_objects|target_max_bytes|cache_target_dirty_ratio|cache_target_dirty_high_ratio|cache_target_full_ratio|cache_min_flush_age|cache_min_evict_age|erasure_code_profile|min_read_recency_for_promote|all|min_write_recency_for_promote|fast_read|hit_set_grade_decay_rate|hit_set_search_last_n|scrub_min_interval|scrub_max_interval|deep_scrub_interval|recovery_priority|recovery_op_priority|scrub_priority|compression_mode|compression_algorithm|compression_required_ratio|compression_max_blob_size|compression_min_blob_size|csum_type|csum_min_block|csum_max_block|allow_ec_overwrites|fingerprint_algorithm|pg_autoscale_mode|pg_autoscale_bias|pg_num_min|pg_num_max|target_size_bytes|target_size_ratio|dedup_tier|dedup_chunk_algorithm|dedup_cdc_chunk_size|eio|bulk",
"get pool parameter <var>", "osd", "r")
COMMAND("osd pool set "
"name=pool,type=CephPoolname "
"name=var,type=CephChoices,strings=size|min_size|pg_num|pgp_num|pgp_num_actual|crush_rule|hashpspool|nodelete|nopgchange|nosizechange|write_fadvise_dontneed|noscrub|nodeep-scrub|hit_set_type|hit_set_period|hit_set_count|hit_set_fpp|use_gmt_hitset|target_max_bytes|target_max_objects|cache_target_dirty_ratio|cache_target_dirty_high_ratio|cache_target_full_ratio|cache_min_flush_age|cache_min_evict_age|min_read_recency_for_promote|min_write_recency_for_promote|fast_read|hit_set_grade_decay_rate|hit_set_search_last_n|scrub_min_interval|scrub_max_interval|deep_scrub_interval|recovery_priority|recovery_op_priority|scrub_priority|compression_mode|compression_algorithm|compression_required_ratio|compression_max_blob_size|compression_min_blob_size|csum_type|csum_min_block|csum_max_block|allow_ec_overwrites|fingerprint_algorithm|pg_autoscale_mode|pg_autoscale_bias|pg_num_min|target_size_bytes|target_size_ratio|dedup_tier|dedup_chunk_algorithm|dedup_cdc_chunk_size|eio|bulk "
"name=var,type=CephChoices,strings=size|min_size|pg_num|pgp_num|pgp_num_actual|crush_rule|hashpspool|nodelete|nopgchange|nosizechange|write_fadvise_dontneed|noscrub|nodeep-scrub|hit_set_type|hit_set_period|hit_set_count|hit_set_fpp|use_gmt_hitset|target_max_bytes|target_max_objects|cache_target_dirty_ratio|cache_target_dirty_high_ratio|cache_target_full_ratio|cache_min_flush_age|cache_min_evict_age|min_read_recency_for_promote|min_write_recency_for_promote|fast_read|hit_set_grade_decay_rate|hit_set_search_last_n|scrub_min_interval|scrub_max_interval|deep_scrub_interval|recovery_priority|recovery_op_priority|scrub_priority|compression_mode|compression_algorithm|compression_required_ratio|compression_max_blob_size|compression_min_blob_size|csum_type|csum_min_block|csum_max_block|allow_ec_overwrites|fingerprint_algorithm|pg_autoscale_mode|pg_autoscale_bias|pg_num_min|pg_num_max|target_size_bytes|target_size_ratio|dedup_tier|dedup_chunk_algorithm|dedup_cdc_chunk_size|eio|bulk "
"name=val,type=CephString "
"name=yes_i_really_mean_it,type=CephBool,req=false",
"set pool parameter <var> to <val>", "osd", "rw")
Expand Down
40 changes: 37 additions & 3 deletions src/mon/OSDMonitor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -5353,7 +5353,7 @@ namespace {
CSUM_TYPE, CSUM_MAX_BLOCK, CSUM_MIN_BLOCK, FINGERPRINT_ALGORITHM,
PG_AUTOSCALE_MODE, PG_NUM_MIN, TARGET_SIZE_BYTES, TARGET_SIZE_RATIO,
PG_AUTOSCALE_BIAS, DEDUP_TIER, DEDUP_CHUNK_ALGORITHM,
DEDUP_CDC_CHUNK_SIZE, POOL_EIO, BULK };
DEDUP_CDC_CHUNK_SIZE, POOL_EIO, BULK, PG_NUM_MAX };

std::set<osd_pool_get_choices>
subtract_second_from_first(const std::set<osd_pool_get_choices>& first,
Expand Down Expand Up @@ -6082,6 +6082,7 @@ bool OSDMonitor::preprocess_command(MonOpRequestRef op)
{"fingerprint_algorithm", FINGERPRINT_ALGORITHM},
{"pg_autoscale_mode", PG_AUTOSCALE_MODE},
{"pg_num_min", PG_NUM_MIN},
{"pg_num_max", PG_NUM_MAX},
{"target_size_bytes", TARGET_SIZE_BYTES},
{"target_size_ratio", TARGET_SIZE_RATIO},
{"pg_autoscale_bias", PG_AUTOSCALE_BIAS},
Expand Down Expand Up @@ -6311,6 +6312,7 @@ bool OSDMonitor::preprocess_command(MonOpRequestRef op)
case CSUM_MIN_BLOCK:
case FINGERPRINT_ALGORITHM:
case PG_NUM_MIN:
case PG_NUM_MAX:
case TARGET_SIZE_BYTES:
case TARGET_SIZE_RATIO:
case PG_AUTOSCALE_BIAS:
Expand Down Expand Up @@ -6473,6 +6475,7 @@ bool OSDMonitor::preprocess_command(MonOpRequestRef op)
case CSUM_MIN_BLOCK:
case FINGERPRINT_ALGORITHM:
case PG_NUM_MIN:
case PG_NUM_MAX:
case TARGET_SIZE_BYTES:
case TARGET_SIZE_RATIO:
case PG_AUTOSCALE_BIAS:
Expand Down Expand Up @@ -7261,7 +7264,7 @@ int OSDMonitor::prepare_new_pool(MonOpRequestRef op)
bool bulk = false;
int ret = 0;
ret = prepare_new_pool(m->name, m->crush_rule, rule_name,
0, 0, 0, 0, 0, 0.0,
0, 0, 0, 0, 0, 0, 0.0,
erasure_code_profile,
pg_pool_t::TYPE_REPLICATED, 0, FAST_READ_OFF, {}, bulk,
&ss);
Expand Down Expand Up @@ -7863,6 +7866,8 @@ int OSDMonitor::check_pg_num(int64_t pool, int pg_num, int size, int crush_rule,
* @param crush_rule_name The crush rule to use, if crush_rulset <0
* @param pg_num The pg_num to use. If set to 0, will use the system default
* @param pgp_num The pgp_num to use. If set to 0, will use the system default
* @param pg_num_min min pg_num
* @param pg_num_max max pg_num
* @param repl_size Replication factor, or 0 for default
* @param erasure_code_profile The profile name in OSDMap to be used for erasure code
* @param pool_type TYPE_ERASURE, or TYPE_REP
Expand All @@ -7877,6 +7882,7 @@ int OSDMonitor::prepare_new_pool(string& name,
const string &crush_rule_name,
unsigned pg_num, unsigned pgp_num,
unsigned pg_num_min,
unsigned pg_num_max,
const uint64_t repl_size,
const uint64_t target_size_bytes,
const float target_size_ratio,
Expand Down Expand Up @@ -8065,6 +8071,10 @@ int OSDMonitor::prepare_new_pool(string& name,
pg_num_min) {
pi->opts.set(pool_opts_t::PG_NUM_MIN, static_cast<int64_t>(pg_num_min));
}
if (osdmap.require_osd_release >= ceph_release_t::quincy &&
pg_num_max) {
pi->opts.set(pool_opts_t::PG_NUM_MAX, static_cast<int64_t>(pg_num_max));
}
if (auto m = pg_pool_t::get_pg_autoscale_mode_by_name(
pg_autoscale_mode); m != pg_pool_t::pg_autoscale_mode_t::UNKNOWN) {
pi->pg_autoscale_mode = m;
Expand Down Expand Up @@ -8350,6 +8360,19 @@ int OSDMonitor::prepare_command_pool_set(const cmdmap_t& cmdmap,
return -EPERM;
}
}
int64_t pg_min = 0, pg_max = 0;
p.opts.get(pool_opts_t::PG_NUM_MIN, &pg_min);
p.opts.get(pool_opts_t::PG_NUM_MAX, &pg_max);
if (pg_min && n < pg_min) {
ss << "specified pg_num " << n
<< " < pg_num_min " << pg_min;
return -EINVAL;
}
if (pg_max && n > pg_max) {
ss << "specified pg_num " << n
<< " < pg_num_max " << pg_max;
return -EINVAL;
}
if (osdmap.require_osd_release < ceph_release_t::nautilus) {
// pre-nautilus osdmap format; increase pg_num directly
assert(n > (int)p.get_pg_num());
Expand Down Expand Up @@ -8737,6 +8760,16 @@ int OSDMonitor::prepare_command_pool_set(const cmdmap_t& cmdmap,
<< " > pg_num " << p.get_pg_num_target();
return -EINVAL;
}
} else if (var == "pg_num_max") {
if (interr.length()) {
ss << "error parsing int value '" << val << "': " << interr;
return -EINVAL;
}
if (n && n < (int)p.get_pg_num_target()) {
ss << "specified pg_num_max " << n
<< " < pg_num " << p.get_pg_num_target();
return -EINVAL;
}
} else if (var == "recovery_priority") {
if (interr.length()) {
ss << "error parsing int value '" << val << "': " << interr;
Expand Down Expand Up @@ -12724,6 +12757,7 @@ bool OSDMonitor::prepare_command_impl(MonOpRequestRef op,
} else if (prefix == "osd pool create") {
int64_t pg_num = cmd_getval_or<int64_t>(cmdmap, "pg_num", 0);
int64_t pg_num_min = cmd_getval_or<int64_t>(cmdmap, "pg_num_min", 0);
int64_t pg_num_max = cmd_getval_or<int64_t>(cmdmap, "pg_num_max", 0);
int64_t pgp_num = cmd_getval_or<int64_t>(cmdmap, "pgp_num", pg_num);
string pool_type_str;
cmd_getval(cmdmap, "pool_type", pool_type_str);
Expand Down Expand Up @@ -12890,7 +12924,7 @@ bool OSDMonitor::prepare_command_impl(MonOpRequestRef op,
err = prepare_new_pool(poolstr,
-1, // default crush rule
rule_name,
pg_num, pgp_num, pg_num_min,
pg_num, pgp_num, pg_num_min, pg_num_max,
repl_size, target_size_bytes, target_size_ratio,
erasure_code_profile, pool_type,
(uint64_t)expected_num_objects,
Expand Down
1 change: 1 addition & 0 deletions src/mon/OSDMonitor.h
Original file line number Diff line number Diff line change
Expand Up @@ -520,6 +520,7 @@ class OSDMonitor : public PaxosService,
const std::string &crush_rule_name,
unsigned pg_num, unsigned pgp_num,
unsigned pg_num_min,
unsigned pg_num_max,
uint64_t repl_size,
const uint64_t target_size_bytes,
const float target_size_ratio,
Expand Down
2 changes: 2 additions & 0 deletions src/osd/osd_types.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1350,6 +1350,8 @@ static opt_mapping_t opt_mapping = boost::assign::map_list_of
pool_opts_t::FINGERPRINT_ALGORITHM, pool_opts_t::STR))
("pg_num_min", pool_opts_t::opt_desc_t(
pool_opts_t::PG_NUM_MIN, pool_opts_t::INT))
("pg_num_max", pool_opts_t::opt_desc_t(
pool_opts_t::PG_NUM_MAX, pool_opts_t::INT))
("target_size_bytes", pool_opts_t::opt_desc_t(
pool_opts_t::TARGET_SIZE_BYTES, pool_opts_t::INT))
("target_size_ratio", pool_opts_t::opt_desc_t(
Expand Down
1 change: 1 addition & 0 deletions src/osd/osd_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -1093,6 +1093,7 @@ class pool_opts_t {
CSUM_MIN_BLOCK,
FINGERPRINT_ALGORITHM,
PG_NUM_MIN, // min pg_num
PG_NUM_MAX, // max pg_num
TARGET_SIZE_BYTES, // total bytes in pool
TARGET_SIZE_RATIO, // fraction of total cluster
PG_AUTOSCALE_BIAS,
Expand Down
3 changes: 2 additions & 1 deletion src/pybind/mgr/dashboard/controllers/pool.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,8 @@
"expected_num_objects": (int, ""),
"fast_read": (bool, ""),
"options": ({
"pg_num_min": (int, "")
"pg_num_min": (int, ""),
"pg_num_max": (int, "")
}, ""),
"application_metadata": ([str], ""),
"create_time": (str, ""),
Expand Down
4 changes: 4 additions & 0 deletions src/pybind/mgr/dashboard/openapi.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6956,11 +6956,15 @@ paths:
options:
description: ''
properties:
pg_num_max:
description: ''
type: integer
pg_num_min:
description: ''
type: integer
required:
- pg_num_min
- pg_num_max
type: object
pg_autoscale_mode:
description: ''
Expand Down
1 change: 1 addition & 0 deletions src/pybind/mgr/mgr_module.py
Original file line number Diff line number Diff line change
Expand Up @@ -1028,6 +1028,7 @@ def create_pool(self, pool: str) -> None:
'pool': pool,
'pg_num': 1,
'pg_num_min': 1,
'pg_num_max': 32,
}
self.check_mon_command(c)

Expand Down
7 changes: 5 additions & 2 deletions src/pybind/mgr/pg_autoscaler/module.py
Original file line number Diff line number Diff line change
Expand Up @@ -493,8 +493,11 @@ def _calc_final_pg_target(
final_ratio = 1 / (pool_count - root_map[root_id].pool_used)
pool_pg_target = (final_ratio * root_map[root_id].pg_left) / p['size'] * bias

final_pg_target = max(p.get('options', {}).get('pg_num_min', PG_NUM_MIN),
nearest_power_of_two(pool_pg_target))
min_pg = p.get('options', {}).get('pg_num_min', PG_NUM_MIN)
max_pg = p.get('options', {}).get('pg_num_max')
final_pg_target = max(min_pg, nearest_power_of_two(pool_pg_target))
if max_pg and max_pg < final_pg_target:
final_pg_target = max_pg
self.log.info("Pool '{0}' root_id {1} using {2} of space, bias {3}, "
"pg target {4} quantized to {5} (current {6})".format(
p['pool_name'],
Expand Down

0 comments on commit d2265e8

Please sign in to comment.