Skip to content

Commit

Permalink
ceph osd: add support for new op writesame
Browse files Browse the repository at this point in the history
This adds a new ceph request writesame that writes a buffer of length
writesame.data_length bytes at writesame.offset over
writesame.length bytes.

This command maps to SCSI's WRITE SAME request, so users like LIO+rbd
can pass this to the OSD. Right now, it only saves having to transfer
writesame.length bytes over the network, but future versions will be
to fully offload it by passing it directly to the FS/devices if they
support it.

v2:
- Fix tab/spaces to matching coding style.
- Allow zero write length. Check for invalid data lengths.

Signed-off-by: Mike Christie <[email protected]>
Reviewed-by: David Disseldorp <[email protected]>
  • Loading branch information
Mike Christie authored and ddiss committed Apr 25, 2016
1 parent ff18083 commit e334ba9
Show file tree
Hide file tree
Showing 4 changed files with 68 additions and 0 deletions.
8 changes: 8 additions & 0 deletions src/include/rados.h
Original file line number Diff line number Diff line change
Expand Up @@ -256,6 +256,9 @@ extern const char *ceph_osd_state_name(int s);
f(CACHE_PIN, __CEPH_OSD_OP(WR, DATA, 36), "cache-pin") \
f(CACHE_UNPIN, __CEPH_OSD_OP(WR, DATA, 37), "cache-unpin") \
\
/* ESX/SCSI */ \
f(WRITESAME, __CEPH_OSD_OP(WR, DATA, 38), "write-same") \
\
/** multi **/ \
f(CLONERANGE, __CEPH_OSD_OP(WR, MULTI, 1), "clonerange") \
f(ASSERT_SRC_VERSION, __CEPH_OSD_OP(RD, MULTI, 2), "assert-src-version") \
Expand Down Expand Up @@ -533,6 +536,11 @@ struct ceph_osd_op {
__le64 expected_object_size;
__le64 expected_write_size;
} __attribute__ ((packed)) alloc_hint;
struct {
__le64 offset;
__le64 length;
__le64 data_length;
} __attribute__ ((packed)) writesame;
};
__le32 payload_len;
} __attribute__ ((packed));
Expand Down
40 changes: 40 additions & 0 deletions src/osd/ReplicatedPG.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3701,6 +3701,40 @@ int ReplicatedPG::do_xattr_cmp_str(int op, string& v1s, bufferlist& xattr)
}
}

int ReplicatedPG::do_writesame(OpContext *ctx, OSDOp& osd_op)
{
ceph_osd_op& op = osd_op.op;
vector<OSDOp> write_ops(1);
OSDOp& write_op = write_ops[0];
uint64_t write_length = op.writesame.length;
int result = 0;

if (!write_length)
return 0;

if (!op.writesame.data_length || write_length % op.writesame.data_length)
return -EINVAL;

if (op.writesame.data_length != osd_op.indata.length()) {
derr << "invalid length ws data length " << op.writesame.data_length << " actual len " << osd_op.indata.length() << dendl;
return -EINVAL;
}

while (write_length) {
write_op.indata.append(osd_op.indata.c_str(), op.writesame.data_length);
write_length -= op.writesame.data_length;
}

write_op.op.op = CEPH_OSD_OP_WRITE;
write_op.op.extent.offset = op.writesame.offset;
write_op.op.extent.length = op.writesame.length;
result = do_osd_ops(ctx, write_ops);
if (result < 0)
derr << "do_writesame do_osd_ops failed " << result << dendl;

return result;
}

// ========================================================================
// low level osd ops

Expand Down Expand Up @@ -5085,6 +5119,12 @@ int ReplicatedPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& ops)
}
break;

case CEPH_OSD_OP_WRITESAME:
++ctx->num_write;
tracepoint(osd, do_osd_op_pre_writesame, soid.oid.name.c_str(), soid.snap.val, oi.size, op.writesame.offset, op.writesame.length, op.writesame.data_length);
result = do_writesame(ctx, osd_op);
break;

case CEPH_OSD_OP_ROLLBACK :
++ctx->num_write;
tracepoint(osd, do_osd_op_pre_rollback, soid.oid.name.c_str(), soid.snap.val);
Expand Down
2 changes: 2 additions & 0 deletions src/osd/ReplicatedPG.h
Original file line number Diff line number Diff line change
Expand Up @@ -1412,6 +1412,8 @@ class ReplicatedPG : public PG, public PGBackend::Listener {
int do_xattr_cmp_u64(int op, __u64 v1, bufferlist& xattr);
int do_xattr_cmp_str(int op, string& v1s, bufferlist& xattr);

int do_writesame(OpContext *ctx, OSDOp& osd_op);

bool pgls_filter(PGLSFilter *filter, hobject_t& sobj, bufferlist& outdata);
int get_pgls_filter(bufferlist::iterator& iter, PGLSFilter **pfilter);

Expand Down
18 changes: 18 additions & 0 deletions src/tracing/osd.tp
Original file line number Diff line number Diff line change
Expand Up @@ -381,6 +381,24 @@ TRACEPOINT_EVENT(osd, do_osd_op_pre_writefull,
)
)

TRACEPOINT_EVENT(osd, do_osd_op_pre_writesame,
TP_ARGS(
const char*, oid,
uint64_t, snap,
uint64_t, osize,
uint64_t, offset,
uint64_t, length,
uint64_t, data_length),
TP_FIELDS(
ctf_string(oid, oid)
ctf_integer(uint64_t, snap, snap)
ctf_integer(uint64_t, osize, osize)
ctf_integer(uint64_t, offset, offset)
ctf_integer(uint64_t, length, length)
ctf_integer(uint64_t, data_length, data_length)
)
)

TRACEPOINT_EVENT(osd, do_osd_op_pre_rollback,
TP_ARGS(
const char*, oid,
Expand Down

0 comments on commit e334ba9

Please sign in to comment.