Skip to content

Commit

Permalink
crimson/os/seastore: add nvme_read and nvme_write to be used when end…
Browse files Browse the repository at this point in the history
… to end data protection is enabled

Signed-off-by: Myoungwon Oh <[email protected]>
  • Loading branch information
myoungwon committed Jul 22, 2024
1 parent 71398e2 commit 5f86c84
Show file tree
Hide file tree
Showing 4 changed files with 97 additions and 3 deletions.
3 changes: 3 additions & 0 deletions src/crimson/os/seastore/random_block_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@ struct rbm_superblock_t {
checksum_t crc = 0;
device_config_t config;
unsigned int shard_num = 0;
// Must be assigned if ent-to-end-data-protection features is enabled
uint32_t nvme_block_size = 0;
std::vector<rbm_shard_info_t> shard_infos;

DENC(rbm_superblock_t, v, p) {
Expand All @@ -63,6 +65,7 @@ struct rbm_superblock_t {
denc(v.crc, p);
denc(v.config, p);
denc(v.shard_num, p);
denc(v.nvme_block_size, p);
denc(v.shard_infos, p);
DENC_FINISH(p);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,8 @@ std::ostream &operator<<(std::ostream &out, const rbm_superblock_t &header)
<< ", crc=" << header.crc
<< ", config=" << header.config
<< ", shard_num=" << header.shard_num
<< ", end_to_end_data_protection=" << header.is_end_to_end_data_protection();
<< ", end_to_end_data_protection=" << header.is_end_to_end_data_protection()
<< ", device_block_size=" << header.nvme_block_size;
for (auto p : header.shard_infos) {
out << p;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,13 @@ write_ertr::future<> NVMeBlockDevice::write(
if (stream >= stream_id_count) {
supported_stream = WRITE_LIFE_NOT_SET;
}
if (is_end_to_end_data_protection()) {
return seastar::do_with(
std::move(bptr),
[this, offset] (auto &bptr) {
return nvme_write(offset, bptr.length(), bptr.c_str());
});
}
return seastar::do_with(
std::move(bptr),
[this, offset, length, supported_stream] (auto& bptr) {
Expand All @@ -159,9 +166,15 @@ read_ertr::future<> NVMeBlockDevice::read(
offset,
bptr.length());
auto length = bptr.length();

if (length == 0) {
return read_ertr::now();
}
assert((length % super.block_size) == 0);

if (is_end_to_end_data_protection()) {
return nvme_read(offset, length, bptr.c_str());
}

return device.dma_read(offset, bptr.c_str(), length).handle_exception(
[](auto e) -> read_ertr::future<size_t> {
logger().error("read: dma_read got error{}", e);
Expand All @@ -188,6 +201,13 @@ write_ertr::future<> NVMeBlockDevice::writev(
if (stream >= stream_id_count) {
supported_stream = WRITE_LIFE_NOT_SET;
}
if (is_end_to_end_data_protection()) {
return seastar::do_with(
std::move(bl),
[this, offset] (auto &bl) {
return nvme_write(offset, bl.length(), bl.c_str());
});
}
bl.rebuild_aligned(super.block_size);

return seastar::do_with(
Expand Down Expand Up @@ -256,6 +276,7 @@ discard_ertr::future<> NVMeBlockDevice::discard(uint64_t offset, uint64_t len) {
nvme_command_ertr::future<nvme_identify_namespace_data_t>
NVMeBlockDevice::identify_namespace(seastar::file f) {
return get_nsid(f).safe_then([this, f](auto nsid) {
namespace_id = nsid;
return seastar::do_with(
nvme_admin_command_t(),
nvme_identify_namespace_data_t(),
Expand Down Expand Up @@ -314,6 +335,7 @@ nvme_command_ertr::future<> NVMeBlockDevice::try_enable_end_to_end_protection()
if (id_namespace_data.lbaf[i].ms ==
nvme_identify_namespace_data_t::METASIZE_FOR_CHECKSUM_OFFLOAD) {
lba_format_index = i;
super.nvme_block_size = (1 << id_namespace_data.lbaf[i].lbads);
break;
}
}
Expand Down Expand Up @@ -362,4 +384,60 @@ nvme_command_ertr::future<> NVMeBlockDevice::initialize_nvme_features() {
return nvme_command_ertr::now();
}

write_ertr::future<> NVMeBlockDevice::nvme_write(
uint64_t offset, size_t len, void *buffer_ptr) {
return seastar::do_with(
nvme_io_command_t(),
[this, offset, len, buffer_ptr] (auto &cmd) {
cmd.common.opcode = nvme_io_command_t::OPCODE_WRITE;
cmd.common.nsid = namespace_id;
cmd.common.data_len = len;
// To perform checksum offload, we need to set PRACT to 1 and PRCHK to 4
// according to NVMe spec.
cmd.rw.prinfo_pract = nvme_rw_command_t::PROTECT_INFORMATION_ACTION_ENABLE;
cmd.rw.prinfo_prchk = nvme_rw_command_t::PROTECT_INFORMATION_CHECK_GUARD;
cmd.common.addr = (__u64)(uintptr_t)buffer_ptr;
ceph_assert(super.nvme_block_size > 0);
auto lba_shift = ffsll(super.nvme_block_size) - 1;
cmd.rw.s_lba = offset >> lba_shift;
cmd.rw.nlb = (len >> lba_shift) - 1;
return pass_through_io(cmd
).safe_then([] (auto ret) {
if (ret != 0) {
logger().error(
"write nvm command with checksum offload fails : {}", ret);
ceph_abort();
}
return nvme_command_ertr::now();
});
});
}

read_ertr::future<> NVMeBlockDevice::nvme_read(
uint64_t offset, size_t len, void *buffer_ptr) {
return seastar::do_with(
nvme_io_command_t(),
[this, offset, len, buffer_ptr] (auto &cmd) {
cmd.common.opcode = nvme_io_command_t::OPCODE_READ;
cmd.common.nsid = namespace_id;
cmd.common.data_len = len;
cmd.rw.prinfo_pract = nvme_rw_command_t::PROTECT_INFORMATION_ACTION_ENABLE;
cmd.rw.prinfo_prchk = nvme_rw_command_t::PROTECT_INFORMATION_CHECK_GUARD;
cmd.common.addr = (__u64)(uintptr_t)buffer_ptr;
ceph_assert(super.nvme_block_size > 0);
auto lba_shift = ffsll(super.nvme_block_size) - 1;
cmd.rw.s_lba = offset >> lba_shift;
cmd.rw.nlb = (len >> lba_shift) - 1;
return pass_through_io(cmd
).safe_then([] (auto ret) {
if (ret != 0) {
logger().error(
"read nvm command with checksum offload fails : {}", ret);
ceph_abort();
}
return nvme_command_ertr::now();
});
});
}

}
14 changes: 13 additions & 1 deletion src/crimson/os/seastore/random_block_manager/nvme_block_device.h
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,11 @@ struct nvme_rw_command_t {
uint32_t dspec : 16;

static const uint32_t DTYPE_STREAM = 1;

static const uint8_t PROTECT_INFORMATION_ACTION_ENABLE = 1;
static const uint8_t PROTECT_INFORMATION_CHECK_GUARD = 4;
static const uint8_t PROTECT_INFORMATION_CHECK_APPLICATION_TAG = 2;
static const uint8_t PROTECT_INFORMATION_CHECK_LOGICAL_REFERENCE_TAG = 1;
};

struct nvme_io_command_t {
Expand All @@ -178,7 +183,7 @@ struct nvme_io_command_t {
nvme_rw_command_t rw;
};
static const uint8_t OPCODE_WRITE = 0x01;
static const uint8_t OPCODE_READ = 0x01;
static const uint8_t OPCODE_READ = 0x02;
};

/*
Expand Down Expand Up @@ -224,6 +229,9 @@ class NVMeBlockDevice : public RBMDevice {
uint64_t offset,
bufferptr &bptr) final;

read_ertr::future<> nvme_read(
uint64_t offset, size_t len, void *buffer_ptr);

close_ertr::future<> close() override;

discard_ertr::future<> discard(
Expand All @@ -241,6 +249,9 @@ class NVMeBlockDevice : public RBMDevice {
ceph::bufferlist bl,
uint16_t stream = 0) final;

write_ertr::future<> nvme_write(
uint64_t offset, size_t len, void *buffer_ptr);

stat_device_ret stat_device() final {
return seastar::file_stat(device_path, seastar::follow_symlink::yes
).handle_exception([](auto e) -> stat_device_ret {
Expand Down Expand Up @@ -376,6 +387,7 @@ class NVMeBlockDevice : public RBMDevice {
uint64_t write_alignment = 4096;
uint32_t atomic_write_unit = 4096;

int namespace_id; // TODO: multi namespaces
std::string device_path;
seastar::sharded<NVMeBlockDevice> shard_devices;
};
Expand Down

0 comments on commit 5f86c84

Please sign in to comment.