Skip to content

Commit

Permalink
osd,test: remove cdc_window_size entirely
Browse files Browse the repository at this point in the history
To avoid miscalculation caused by boundary mismatch,
we prefer to read whole object.

Signed-off-by: Myoungwon Oh <[email protected]>
  • Loading branch information
myoungwon committed Nov 19, 2020
1 parent 9e59ee8 commit cee688c
Show file tree
Hide file tree
Showing 7 changed files with 54 additions and 106 deletions.
4 changes: 2 additions & 2 deletions src/mon/MonCommands.h
Original file line number Diff line number Diff line change
Expand Up @@ -1090,11 +1090,11 @@ COMMAND("osd pool rename "
"rename <srcpool> to <destpool>", "osd", "rw")
COMMAND("osd pool get "
"name=pool,type=CephPoolname "
"name=var,type=CephChoices,strings=size|min_size|pg_num|pgp_num|crush_rule|hashpspool|nodelete|nopgchange|nosizechange|write_fadvise_dontneed|noscrub|nodeep-scrub|hit_set_type|hit_set_period|hit_set_count|hit_set_fpp|use_gmt_hitset|target_max_objects|target_max_bytes|cache_target_dirty_ratio|cache_target_dirty_high_ratio|cache_target_full_ratio|cache_min_flush_age|cache_min_evict_age|erasure_code_profile|min_read_recency_for_promote|all|min_write_recency_for_promote|fast_read|hit_set_grade_decay_rate|hit_set_search_last_n|scrub_min_interval|scrub_max_interval|deep_scrub_interval|recovery_priority|recovery_op_priority|scrub_priority|compression_mode|compression_algorithm|compression_required_ratio|compression_max_blob_size|compression_min_blob_size|csum_type|csum_min_block|csum_max_block|allow_ec_overwrites|fingerprint_algorithm|pg_autoscale_mode|pg_autoscale_bias|pg_num_min|target_size_bytes|target_size_ratio|dedup_tier|dedup_chunk_algorithm|dedup_cdc_window_size|dedup_cdc_chunk_size",
"name=var,type=CephChoices,strings=size|min_size|pg_num|pgp_num|crush_rule|hashpspool|nodelete|nopgchange|nosizechange|write_fadvise_dontneed|noscrub|nodeep-scrub|hit_set_type|hit_set_period|hit_set_count|hit_set_fpp|use_gmt_hitset|target_max_objects|target_max_bytes|cache_target_dirty_ratio|cache_target_dirty_high_ratio|cache_target_full_ratio|cache_min_flush_age|cache_min_evict_age|erasure_code_profile|min_read_recency_for_promote|all|min_write_recency_for_promote|fast_read|hit_set_grade_decay_rate|hit_set_search_last_n|scrub_min_interval|scrub_max_interval|deep_scrub_interval|recovery_priority|recovery_op_priority|scrub_priority|compression_mode|compression_algorithm|compression_required_ratio|compression_max_blob_size|compression_min_blob_size|csum_type|csum_min_block|csum_max_block|allow_ec_overwrites|fingerprint_algorithm|pg_autoscale_mode|pg_autoscale_bias|pg_num_min|target_size_bytes|target_size_ratio|dedup_tier|dedup_chunk_algorithm|dedup_cdc_chunk_size",
"get pool parameter <var>", "osd", "r")
COMMAND("osd pool set "
"name=pool,type=CephPoolname "
"name=var,type=CephChoices,strings=size|min_size|pg_num|pgp_num|pgp_num_actual|crush_rule|hashpspool|nodelete|nopgchange|nosizechange|write_fadvise_dontneed|noscrub|nodeep-scrub|hit_set_type|hit_set_period|hit_set_count|hit_set_fpp|use_gmt_hitset|target_max_bytes|target_max_objects|cache_target_dirty_ratio|cache_target_dirty_high_ratio|cache_target_full_ratio|cache_min_flush_age|cache_min_evict_age|min_read_recency_for_promote|min_write_recency_for_promote|fast_read|hit_set_grade_decay_rate|hit_set_search_last_n|scrub_min_interval|scrub_max_interval|deep_scrub_interval|recovery_priority|recovery_op_priority|scrub_priority|compression_mode|compression_algorithm|compression_required_ratio|compression_max_blob_size|compression_min_blob_size|csum_type|csum_min_block|csum_max_block|allow_ec_overwrites|fingerprint_algorithm|pg_autoscale_mode|pg_autoscale_bias|pg_num_min|target_size_bytes|target_size_ratio|dedup_tier|dedup_chunk_algorithm|dedup_cdc_window_size|dedup_cdc_chunk_size "
"name=var,type=CephChoices,strings=size|min_size|pg_num|pgp_num|pgp_num_actual|crush_rule|hashpspool|nodelete|nopgchange|nosizechange|write_fadvise_dontneed|noscrub|nodeep-scrub|hit_set_type|hit_set_period|hit_set_count|hit_set_fpp|use_gmt_hitset|target_max_bytes|target_max_objects|cache_target_dirty_ratio|cache_target_dirty_high_ratio|cache_target_full_ratio|cache_min_flush_age|cache_min_evict_age|min_read_recency_for_promote|min_write_recency_for_promote|fast_read|hit_set_grade_decay_rate|hit_set_search_last_n|scrub_min_interval|scrub_max_interval|deep_scrub_interval|recovery_priority|recovery_op_priority|scrub_priority|compression_mode|compression_algorithm|compression_required_ratio|compression_max_blob_size|compression_min_blob_size|csum_type|csum_min_block|csum_max_block|allow_ec_overwrites|fingerprint_algorithm|pg_autoscale_mode|pg_autoscale_bias|pg_num_min|target_size_bytes|target_size_ratio|dedup_tier|dedup_chunk_algorithm|dedup_cdc_chunk_size "
"name=val,type=CephString "
"name=yes_i_really_mean_it,type=CephBool,req=false",
"set pool parameter <var> to <val>", "osd", "rw")
Expand Down
10 changes: 1 addition & 9 deletions src/mon/OSDMonitor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -5373,7 +5373,7 @@ namespace {
COMPRESSION_MAX_BLOB_SIZE, COMPRESSION_MIN_BLOB_SIZE,
CSUM_TYPE, CSUM_MAX_BLOCK, CSUM_MIN_BLOCK, FINGERPRINT_ALGORITHM,
PG_AUTOSCALE_MODE, PG_NUM_MIN, TARGET_SIZE_BYTES, TARGET_SIZE_RATIO,
PG_AUTOSCALE_BIAS, DEDUP_TIER, DEDUP_CHUNK_ALGORITHM, DEDUP_CDC_WINDOW_SIZE,
PG_AUTOSCALE_BIAS, DEDUP_TIER, DEDUP_CHUNK_ALGORITHM,
DEDUP_CDC_CHUNK_SIZE };

std::set<osd_pool_get_choices>
Expand Down Expand Up @@ -6111,7 +6111,6 @@ bool OSDMonitor::preprocess_command(MonOpRequestRef op)
{"pg_autoscale_bias", PG_AUTOSCALE_BIAS},
{"dedup_tier", DEDUP_TIER},
{"dedup_chunk_algorithm", DEDUP_CHUNK_ALGORITHM},
{"dedup_cdc_window_size", DEDUP_CDC_WINDOW_SIZE},
{"dedup_cdc_chunk_size", DEDUP_CDC_CHUNK_SIZE},
};

Expand Down Expand Up @@ -6331,7 +6330,6 @@ bool OSDMonitor::preprocess_command(MonOpRequestRef op)
case PG_AUTOSCALE_BIAS:
case DEDUP_TIER:
case DEDUP_CHUNK_ALGORITHM:
case DEDUP_CDC_WINDOW_SIZE:
case DEDUP_CDC_CHUNK_SIZE:
pool_opts_t::key_t key = pool_opts_t::get_opt_desc(i->first).key;
if (p->opts.is_set(key)) {
Expand Down Expand Up @@ -6492,7 +6490,6 @@ bool OSDMonitor::preprocess_command(MonOpRequestRef op)
case PG_AUTOSCALE_BIAS:
case DEDUP_TIER:
case DEDUP_CHUNK_ALGORITHM:
case DEDUP_CDC_WINDOW_SIZE:
case DEDUP_CDC_CHUNK_SIZE:
for (i = ALL_CHOICES.begin(); i != ALL_CHOICES.end(); ++i) {
if (i->second == *it)
Expand Down Expand Up @@ -8691,11 +8688,6 @@ int OSDMonitor::prepare_command_pool_set(const cmdmap_t& cmdmap,
return -EINVAL;
}
}
} else if (var == "dedup_cdc_window_size") {
if (interr.length()) {
ss << "error parsing int value '" << val << "': " << interr;
return -EINVAL;
}
} else if (var == "dedup_cdc_chunk_size") {
if (interr.length()) {
ss << "error parsing int value '" << val << "': " << interr;
Expand Down
99 changes: 44 additions & 55 deletions src/osd/PrimaryLogPG.cc
Original file line number Diff line number Diff line change
Expand Up @@ -10025,7 +10025,6 @@ struct C_Flush : public Context {
int PrimaryLogPG::start_dedup(OpRequestRef op, ObjectContextRef obc)
{
bufferlist bl;
uint64_t cur_off = 0;
const object_info_t& oi = obc->obs.oi;
const hobject_t& soid = oi.soid;

Expand All @@ -10045,49 +10044,45 @@ int PrimaryLogPG::start_dedup(OpRequestRef op, ObjectContextRef obc)
*/
ManifestOpRef mop(std::make_shared<ManifestOp>(nullptr, 0));

while (cur_off < oi.size) {
// cdc
std::map<uint64_t, bufferlist> chunks;
int r = do_cdc(oi, cur_off, mop->new_manifest.chunk_map, chunks);
if (r < 0) {
return r;
}
if (!chunks.size()) {
break;
}
// cdc
std::map<uint64_t, bufferlist> chunks;
int r = do_cdc(oi, mop->new_manifest.chunk_map, chunks);
if (r < 0) {
return r;
}
if (!chunks.size()) {
return 0;
}

// chunks issued here are different with chunk_map newly generated
// because the same chunks in previous snap will not be issued
// So, we need two data structures; the first is the issued chunk list to track
// issued operations, and the second is the new chunk_map to update chunk_map after
// all operations are finished
object_ref_delta_t refs;
ObjectContextRef obc_l, obc_g;
get_adjacent_clones(obc, obc_l, obc_g);
// skip if the same content exits in prev snap at same offset
mop->new_manifest.calc_refs_to_inc_on_set(
obc_l ? &(obc_l->obs.oi.manifest) : nullptr,
obc_g ? &(obc_g->obs.oi.manifest) : nullptr,
refs,
chunks.begin()->first); // to avoid unnecessary search
// chunks issued here are different with chunk_map newly generated
// because the same chunks in previous snap will not be issued
// So, we need two data structures; the first is the issued chunk list to track
// issued operations, and the second is the new chunk_map to update chunk_map after
// all operations are finished
object_ref_delta_t refs;
ObjectContextRef obc_l, obc_g;
get_adjacent_clones(obc, obc_l, obc_g);
// skip if the same content exits in prev snap at same offset
mop->new_manifest.calc_refs_to_inc_on_set(
obc_l ? &(obc_l->obs.oi.manifest) : nullptr,
obc_g ? &(obc_g->obs.oi.manifest) : nullptr,
refs);

for (auto p : chunks) {
hobject_t target = mop->new_manifest.chunk_map[p.first].oid;
if (refs.find(target) == refs.end()) {
continue;
}
C_SetDedupChunks *fin = new C_SetDedupChunks(this, soid, get_last_peering_reset(), p.first);
ceph_tid_t tid = refcount_manifest(soid, target, refcount_t::CREATE_OR_GET_REF,
fin, move(chunks[p.first]));
mop->chunks[target] = make_pair(p.first, p.second.length());
mop->num_chunks++;
mop->tids[p.first] = tid;
fin->tid = tid;
dout(10) << __func__ << " oid: " << soid << " tid: " << tid
<< " target: " << target << " offset: " << p.first
<< " length: " << p.second.length() << dendl;
for (auto p : chunks) {
hobject_t target = mop->new_manifest.chunk_map[p.first].oid;
if (refs.find(target) == refs.end()) {
continue;
}
cur_off += r;
C_SetDedupChunks *fin = new C_SetDedupChunks(this, soid, get_last_peering_reset(), p.first);
ceph_tid_t tid = refcount_manifest(soid, target, refcount_t::CREATE_OR_GET_REF,
fin, move(chunks[p.first]));
mop->chunks[target] = make_pair(p.first, p.second.length());
mop->num_chunks++;
mop->tids[p.first] = tid;
fin->tid = tid;
dout(10) << __func__ << " oid: " << soid << " tid: " << tid
<< " target: " << target << " offset: " << p.first
<< " length: " << p.second.length() << dendl;
}

if (mop->tids.size()) {
Expand All @@ -10101,14 +10096,12 @@ int PrimaryLogPG::start_dedup(OpRequestRef op, ObjectContextRef obc)
return -EINPROGRESS;
}

int PrimaryLogPG::do_cdc(const object_info_t& oi, uint64_t off,
int PrimaryLogPG::do_cdc(const object_info_t& oi,
std::map<uint64_t, chunk_info_t>& chunk_map,
std::map<uint64_t, bufferlist>& chunks)
{
uint64_t cur_off = off;
string chunk_algo = pool.info.get_dedup_chunk_algorithm_name();
int64_t chunk_size = pool.info.get_dedup_cdc_chunk_size();
uint64_t max_window_size = static_cast<uint64_t>(pool.info.get_dedup_cdc_window_size());
uint64_t total_length = 0;

std::unique_ptr<CDC> cdc = CDC::create(chunk_algo, cbits(chunk_size)-1);
Expand All @@ -10117,9 +10110,6 @@ int PrimaryLogPG::do_cdc(const object_info_t& oi, uint64_t off,
return -EINVAL;
}

if (cur_off >= oi.size) {
return -ERANGE;
}
bufferlist bl;
/**
* We disable EC pool as a base tier of distributed dedup.
Expand All @@ -10128,10 +10118,10 @@ int PrimaryLogPG::do_cdc(const object_info_t& oi, uint64_t off,
* As s result, we leave this as a future work.
*/
int r = pgbackend->objects_read_sync(
oi.soid, cur_off, max_window_size, 0, &bl);
oi.soid, 0, oi.size, 0, &bl);
if (r < 0) {
dout(0) << __func__ << " read fail " << " offset: " << cur_off
<< " len: " << max_window_size << " r: " << r << dendl;
dout(0) << __func__ << " read fail " << oi.soid
<< " len: " << oi.size << " r: " << r << dendl;
return r;
}
if (bl.length() == 0) {
Expand All @@ -10140,20 +10130,19 @@ int PrimaryLogPG::do_cdc(const object_info_t& oi, uint64_t off,
}

dout(10) << __func__ << " oid: " << oi.soid << " len: " << bl.length()
<< " oi.size: " << oi.size << " window_size: " << max_window_size
<< " chunk_size: " << chunk_size << dendl;
<< " oi.size: " << oi.size
<< " chunk_size: " << chunk_size << dendl;

vector<pair<uint64_t, uint64_t>> cdc_chunks;
cdc->calc_chunks(bl, &cdc_chunks);

// get fingerprint
for (auto p : cdc_chunks) {
bufferlist chunk;
uint64_t object_offset = off + p.first;
chunk.substr_of(bl, p.first, p.second);
hobject_t target = get_fpoid_from_chunk(oi.soid, chunk);
chunks[object_offset] = move(chunk);
chunk_map[object_offset] = chunk_info_t(0, p.second, target);
chunks[p.first] = move(chunk);
chunk_map[p.first] = chunk_info_t(0, p.second, target);
total_length += p.second;
}
return total_length;
Expand Down
2 changes: 1 addition & 1 deletion src/osd/PrimaryLogPG.h
Original file line number Diff line number Diff line change
Expand Up @@ -1512,7 +1512,7 @@ class PrimaryLogPG : public PG, public PGBackend::Listener {
ObjectContextRef& _l, ObjectContextRef& _g);
bool inc_refcount_by_set(OpContext* ctx, object_manifest_t& tgt,
OSDOp& osd_op);
int do_cdc(const object_info_t& oi, uint64_t off, std::map<uint64_t, chunk_info_t>& chunk_map,
int do_cdc(const object_info_t& oi, std::map<uint64_t, chunk_info_t>& chunk_map,
std::map<uint64_t, bufferlist>& chunks);
int start_dedup(OpRequestRef op, ObjectContextRef obc);
hobject_t get_fpoid_from_chunk(const hobject_t soid, bufferlist& chunk);
Expand Down
9 changes: 3 additions & 6 deletions src/osd/osd_types.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1360,9 +1360,7 @@ static opt_mapping_t opt_mapping = boost::assign::map_list_of
("dedup_chunk_algorithm", pool_opts_t::opt_desc_t(
pool_opts_t::DEDUP_CHUNK_ALGORITHM, pool_opts_t::STR))
("dedup_cdc_chunk_size", pool_opts_t::opt_desc_t(
pool_opts_t::DEDUP_CDC_CHUNK_SIZE, pool_opts_t::INT))
("dedup_cdc_window_size", pool_opts_t::opt_desc_t(
pool_opts_t::DEDUP_CDC_WINDOW_SIZE, pool_opts_t::INT));
pool_opts_t::DEDUP_CDC_CHUNK_SIZE, pool_opts_t::INT));

bool pool_opts_t::is_opt_name(const std::string& name)
{
Expand Down Expand Up @@ -5928,11 +5926,10 @@ std::ostream& operator<<(std::ostream& out, const object_ref_delta_t & ci)
void object_manifest_t::calc_refs_to_inc_on_set(
const object_manifest_t* _g,
const object_manifest_t* _l,
object_ref_delta_t &refs,
uint64_t start) const
object_ref_delta_t &refs) const
{
/* avoid to increment the same reference on adjacent clones */
auto iter = start ? chunk_map.find(start) : chunk_map.begin();
auto iter = chunk_map.begin();
auto find_chunk = [](decltype(iter) &i, const object_manifest_t* cur)
-> bool {
if (cur) {
Expand Down
9 changes: 1 addition & 8 deletions src/osd/osd_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -1062,7 +1062,6 @@ class pool_opts_t {
READ_LEASE_INTERVAL,
DEDUP_TIER,
DEDUP_CHUNK_ALGORITHM,
DEDUP_CDC_WINDOW_SIZE,
DEDUP_CDC_CHUNK_SIZE,
};

Expand Down Expand Up @@ -1594,11 +1593,6 @@ struct pg_pool_t {
opts.get(pool_opts_t::DEDUP_CDC_CHUNK_SIZE, &chunk_size);
return chunk_size;
}
int64_t get_dedup_cdc_window_size() const {
int64_t window_size;
opts.get(pool_opts_t::DEDUP_CDC_WINDOW_SIZE, &window_size);
return window_size;
}

/// application -> key/value metadata
std::map<std::string, std::map<std::string, std::string>> application_metadata;
Expand Down Expand Up @@ -5687,8 +5681,7 @@ struct object_manifest_t {
void calc_refs_to_inc_on_set(
const object_manifest_t* g, ///< [in] manifest for clone > *this
const object_manifest_t* l, ///< [in] manifest for clone < *this
object_ref_delta_t &delta, ///< [out] set of refs to drop
uint64_t start = 0 ///< [in] start position
object_ref_delta_t &delta ///< [out] set of refs to drop
) const;

/**
Expand Down
27 changes: 2 additions & 25 deletions src/test/librados/tier_cxx.cc
Original file line number Diff line number Diff line change
Expand Up @@ -4664,9 +4664,6 @@ TEST_F(LibRadosTwoPoolsPP, DedupFlushRead) {
ASSERT_EQ(0, cluster.mon_command(
set_pool_str(cache_pool_name, "dedup_chunk_algorithm", "fastcdc"),
inbl, NULL, NULL));
ASSERT_EQ(0, cluster.mon_command(
set_pool_str(cache_pool_name, "dedup_cdc_window_size", 8192),
inbl, NULL, NULL));
ASSERT_EQ(0, cluster.mon_command(
set_pool_str(cache_pool_name, "dedup_cdc_chunk_size", 1024),
inbl, NULL, NULL));
Expand Down Expand Up @@ -4739,9 +4736,6 @@ TEST_F(LibRadosTwoPoolsPP, DedupFlushRead) {
ASSERT_EQ(test_bl[1], chunk[1]);
}

ASSERT_EQ(0, cluster.mon_command(
set_pool_str(cache_pool_name, "dedup_cdc_window_size", 512),
inbl, NULL, NULL));
ASSERT_EQ(0, cluster.mon_command(
set_pool_str(cache_pool_name, "dedup_cdc_chunk_size", 512),
inbl, NULL, NULL));
Expand Down Expand Up @@ -4769,11 +4763,9 @@ TEST_F(LibRadosTwoPoolsPP, DedupFlushRead) {

cdc = CDC::create("fastcdc", cbits(512)-1);
chunks.clear();
bufferlist chunk_target;
chunk_target.substr_of(gbl, 512, 512); // calculate based on window size
cdc->calc_chunks(chunk_target, &chunks);
cdc->calc_chunks(gbl, &chunks);
bufferlist chunk_512;
chunk_512.substr_of(gbl, chunks[0].first + 512, chunks[0].second);
chunk_512.substr_of(gbl, chunks[3].first, chunks[3].second);
{
unsigned char fingerprint[CEPH_CRYPTO_SHA1_DIGESTSIZE + 1] = {0};
char p_str[CEPH_CRYPTO_SHA1_DIGESTSIZE*2+1] = {0};
Expand All @@ -4792,9 +4784,6 @@ TEST_F(LibRadosTwoPoolsPP, DedupFlushRead) {
ASSERT_EQ(test_bl[1], chunk_512[1]);
}

ASSERT_EQ(0, cluster.mon_command(
set_pool_str(cache_pool_name, "dedup_cdc_window_size", 16384),
inbl, NULL, NULL));
ASSERT_EQ(0, cluster.mon_command(
set_pool_str(cache_pool_name, "dedup_cdc_chunk_size", 16384),
inbl, NULL, NULL));
Expand Down Expand Up @@ -4843,9 +4832,6 @@ TEST_F(LibRadosTwoPoolsPP, DedupFlushRead) {
}

// less than object size
ASSERT_EQ(0, cluster.mon_command(
set_pool_str(cache_pool_name, "dedup_cdc_window_size", 4096),
inbl, NULL, NULL));
ASSERT_EQ(0, cluster.mon_command(
set_pool_str(cache_pool_name, "dedup_cdc_chunk_size", 1024),
inbl, NULL, NULL));
Expand Down Expand Up @@ -4933,9 +4919,6 @@ TEST_F(LibRadosTwoPoolsPP, ManifestFlushSnap) {
ASSERT_EQ(0, cluster.mon_command(
set_pool_str(cache_pool_name, "dedup_chunk_algorithm", "fastcdc"),
inbl, NULL, NULL));
ASSERT_EQ(0, cluster.mon_command(
set_pool_str(cache_pool_name, "dedup_cdc_window_size", 8192),
inbl, NULL, NULL));
ASSERT_EQ(0, cluster.mon_command(
set_pool_str(cache_pool_name, "dedup_cdc_chunk_size", 1024),
inbl, NULL, NULL));
Expand Down Expand Up @@ -5118,9 +5101,6 @@ TEST_F(LibRadosTwoPoolsPP, ManifestFlushDupCount) {
ASSERT_EQ(0, cluster.mon_command(
set_pool_str(cache_pool_name, "dedup_chunk_algorithm", "fastcdc"),
inbl, NULL, NULL));
ASSERT_EQ(0, cluster.mon_command(
set_pool_str(cache_pool_name, "dedup_cdc_window_size", 8192),
inbl, NULL, NULL));
ASSERT_EQ(0, cluster.mon_command(
set_pool_str(cache_pool_name, "dedup_cdc_chunk_size", 1024),
inbl, NULL, NULL));
Expand Down Expand Up @@ -5344,9 +5324,6 @@ TEST_F(LibRadosTwoPoolsPP, TierFlushDuringFlush) {
ASSERT_EQ(0, cluster.mon_command(
set_pool_str(cache_pool_name, "dedup_chunk_algorithm", "fastcdc"),
inbl, NULL, NULL));
ASSERT_EQ(0, cluster.mon_command(
set_pool_str(cache_pool_name, "dedup_cdc_window_size", 8192),
inbl, NULL, NULL));
ASSERT_EQ(0, cluster.mon_command(
set_pool_str(cache_pool_name, "dedup_cdc_chunk_size", 1024),
inbl, NULL, NULL));
Expand Down

0 comments on commit cee688c

Please sign in to comment.