Skip to content

Commit

Permalink
GPUDirect RDMA support
Browse files Browse the repository at this point in the history
  • Loading branch information
Yufei Ren committed Apr 26, 2017
1 parent 10c37df commit 0355385
Show file tree
Hide file tree
Showing 10 changed files with 181 additions and 2 deletions.
3 changes: 3 additions & 0 deletions HOWTO
Original file line number Diff line number Diff line change
Expand Up @@ -1468,6 +1468,9 @@ Buffers and memory
**mmapshared**
Same as mmap, but use a MMAP_SHARED mapping.

**cudamalloc**
Use GPU memory as the buffers for GPUDirect RDMA benchmark.

The area allocated is a function of the maximum allowed bs size for the job,
multiplied by the I/O depth given. Note that for **shmhuge** and
**mmaphuge** to work, the system must have free huge pages allocated. This
Expand Down
24 changes: 22 additions & 2 deletions configure
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,8 @@ for opt do
;;
--disable-pmem) disable_pmem="yes"
;;
--enable-cuda) enable_cuda="yes"
;;
--help)
show_help="yes"
;;
Expand All @@ -206,14 +208,15 @@ if test "$show_help" = "yes" ; then
echo "--esx Configure build options for esx"
echo "--enable-gfio Enable building of gtk gfio"
echo "--disable-numa Disable libnuma even if found"
echo "--disable-rdma Disable RDMA support even if found"
echo "--disable-rdma Disable RDMA support even if found"
echo "--disable-gfapi Disable gfapi"
echo "--enable-libhdfs Enable hdfs support"
echo "--disable-lex Disable use of lex/yacc for math"
echo "--disable-pmem Disable pmem based engines even if found"
echo "--enable-lex Enable use of lex/yacc for math"
echo "--disable-shm Disable SHM support"
echo "--disable-optimizations Don't enable compiler optimizations"
echo "--enable-cuda Enable GPUDirect RDMA support"
exit $exit_val
fi

Expand Down Expand Up @@ -1990,6 +1993,21 @@ EOF
fi
echo "march_armv8_a_crc_crypto $march_armv8_a_crc_crypto"

##########################################
# cuda probe
cuda="no"
cat > $TMPC << EOF
#include <cuda.h>
int main(int argc, char **argv)
{
return cuInit(0);
}
EOF
if test "$enable_cuda" == "yes" && compile_prog "" "-lcuda" "cuda"; then
cuda="yes"
LIBS="-lcuda $LIBS"
fi
echo "cuda $cuda"

#############################################################################

Expand Down Expand Up @@ -2210,10 +2228,12 @@ fi
if test "$disable_opt" = "yes" ; then
output_sym "CONFIG_DISABLE_OPTIMIZATIONS"
fi

if test "$zlib" = "no" ; then
echo "Consider installing zlib-dev (zlib-devel), some fio features depend on it."
fi
if test "$cuda" = "yes" ; then
output_sym "CONFIG_CUDA"
fi

echo "LIBS+=$LIBS" >> $config_host_mak
echo "GFIO_LIBS+=$GFIO_LIBS" >> $config_host_mak
Expand Down
15 changes: 15 additions & 0 deletions examples/gpudirect-rdmaio-client.fio
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# Example gpudirect rdma client job
[global]
ioengine=rdma
hostname=[hostname]
port=[port]
verb=[read/write/send/recv]
mem=cudamalloc
gpu_dev_id=0
bs=1m
size=100g

[sender]
rw=write
iodepth=1
iodepth_batch_complete=1
12 changes: 12 additions & 0 deletions examples/gpudirect-rdmaio-server.fio
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# Example rdma server job
[global]
ioengine=rdma
port=[port]
mem=cudamalloc
gpu_dev_id=0
bs=1m
size=100g

[receiver]
rw=read
iodepth=16
3 changes: 3 additions & 0 deletions fio.1
Original file line number Diff line number Diff line change
Expand Up @@ -1309,6 +1309,9 @@ Same as \fBmmap\fR, but use huge files as backing.
.TP
.B mmapshared
Same as \fBmmap\fR, but use a MMAP_SHARED mapping.
.TP
.B cudamalloc
Use GPU memory as the buffers for GPUDirect RDMA benchmark. The ioengine must be \fBrdma\fR.
.RE
.P
The amount of memory allocated is the maximum allowed \fBblocksize\fR for the
Expand Down
16 changes: 16 additions & 0 deletions fio.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,10 @@
#define MPOL_LOCAL MPOL_MAX
#endif

#ifdef CONFIG_CUDA
#include <cuda.h>
#endif

/*
* offset generator types
*/
Expand Down Expand Up @@ -408,6 +412,18 @@ struct thread_data {
struct steadystate_data ss;

char verror[FIO_VERROR_SIZE];

#ifdef CONFIG_CUDA
/*
* for GPU memory management
*/
int gpu_dev_cnt;
int gpu_dev_id;
CUdevice cu_dev;
CUcontext cu_ctx;
CUdeviceptr dev_mem_ptr;
#endif

};

/*
Expand Down
10 changes: 10 additions & 0 deletions io_u.c
Original file line number Diff line number Diff line change
Expand Up @@ -1654,6 +1654,10 @@ struct io_u *get_io_u(struct thread_data *td)
populate_verify_io_u(td, io_u);
do_scramble = 0;
}
#ifdef CONFIG_CUDA
if (td->o.mem_type == MEM_CUDA_MALLOC)
do_scramble = 0;
#endif
} else if (io_u->ddir == DDIR_READ) {
/*
* Reset the buf_filled parameters so next time if the
Expand All @@ -1674,8 +1678,10 @@ struct io_u *get_io_u(struct thread_data *td)
if (!td_io_prep(td, io_u)) {
if (!td->o.disable_lat)
fio_gettime(&io_u->start_time, NULL);

if (do_scramble)
small_content_scramble(io_u);

return io_u;
}
err_put:
Expand Down Expand Up @@ -2043,6 +2049,10 @@ void fill_io_buffer(struct thread_data *td, void *buf, unsigned int min_write,
{
struct thread_options *o = &td->o;

#ifdef CONFIG_CUDA
if (o->mem_type == MEM_CUDA_MALLOC) return;
#endif

if (o->compress_percentage || o->dedupe_percentage) {
unsigned int perc = td->o.compress_percentage;
struct frand_state *rs;
Expand Down
77 changes: 77 additions & 0 deletions memory.c
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,75 @@ static void free_mem_malloc(struct thread_data *td)
free(td->orig_buffer);
}

#ifdef CONFIG_CUDA

static int alloc_mem_cudamalloc(struct thread_data *td, size_t total_mem)
{
CUresult ret;
char name[128];

ret = cuInit(0);
if (ret != CUDA_SUCCESS) {
log_err("fio: failed initialize cuda driver api\n");
return 1;
}

ret = cuDeviceGetCount(&td->gpu_dev_cnt);
if (ret != CUDA_SUCCESS) {
log_err("fio: failed get device count\n");
return 1;
}
dprint(FD_MEM, "found %d GPU devices\n", td->gpu_dev_cnt);

if (td->gpu_dev_cnt == 0) {
log_err("fio: no GPU device found. "
"Can not perform GPUDirect RDMA.\n");
return 1;
}

td->gpu_dev_id = td->o.gpu_dev_id;
ret = cuDeviceGet(&td->cu_dev, td->gpu_dev_id);
if (ret != CUDA_SUCCESS) {
log_err("fio: failed get GPU device\n");
return 1;
}

ret = cuDeviceGetName(name, sizeof(name), td->gpu_dev_id);
if (ret != CUDA_SUCCESS) {
log_err("fio: failed get device name\n");
return 1;
}
dprint(FD_MEM, "dev_id = [%d], device name = [%s]\n", \
td->gpu_dev_id, name);

ret = cuCtxCreate(&td->cu_ctx, CU_CTX_MAP_HOST, td->cu_dev);
if (ret != CUDA_SUCCESS) {
log_err("fio: failed to create cuda context: %d\n", ret);
return 1;
}

ret = cuMemAlloc(&td->dev_mem_ptr, total_mem);
if (ret != CUDA_SUCCESS) {
log_err("fio: cuMemAlloc %zu bytes failed\n", total_mem);
return 1;
}
td->orig_buffer = (void *) td->dev_mem_ptr;

dprint(FD_MEM, "cudaMalloc %llu %p\n", \
(unsigned long long) total_mem, td->orig_buffer);
return 0;
}

static void free_mem_cudamalloc(struct thread_data *td)
{
if ((void *) td->dev_mem_ptr != NULL)
cuMemFree(td->dev_mem_ptr);

if (cuCtxDestroy(td->cu_ctx) != CUDA_SUCCESS)
log_err("fio: failed to destroy cuda context\n");
}
#endif

/*
* Set up the buffer area we need for io.
*/
Expand Down Expand Up @@ -246,6 +315,10 @@ int allocate_io_mem(struct thread_data *td)
else if (td->o.mem_type == MEM_MMAP || td->o.mem_type == MEM_MMAPHUGE ||
td->o.mem_type == MEM_MMAPSHARED)
ret = alloc_mem_mmap(td, total_mem);
#ifdef CONFIG_CUDA
else if (td->o.mem_type == MEM_CUDA_MALLOC)
ret = alloc_mem_cudamalloc(td, total_mem);
#endif
else {
log_err("fio: bad mem type: %d\n", td->o.mem_type);
ret = 1;
Expand Down Expand Up @@ -275,6 +348,10 @@ void free_io_mem(struct thread_data *td)
else if (td->o.mem_type == MEM_MMAP || td->o.mem_type == MEM_MMAPHUGE ||
td->o.mem_type == MEM_MMAPSHARED)
free_mem_mmap(td, total_mem);
#ifdef CONFIG_CUDA
else if (td->o.mem_type == MEM_CUDA_MALLOC)
free_mem_cudamalloc(td);
#endif
else
log_err("Bad memory type %u\n", td->o.mem_type);

Expand Down
18 changes: 18 additions & 0 deletions options.c
Original file line number Diff line number Diff line change
Expand Up @@ -2603,6 +2603,12 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
.oval = MEM_MMAPHUGE,
.help = "Like mmap, but use huge pages",
},
#endif
#ifdef CONFIG_CUDA
{ .ival = "cudamalloc",
.oval = MEM_CUDA_MALLOC,
.help = "Allocate GPU device memory for GPUDirect RDMA",
},
#endif
},
},
Expand Down Expand Up @@ -3562,6 +3568,18 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
.type = FIO_OPT_UNSUPPORTED,
.help = "Build fio with libnuma-dev(el) to enable this option",
},
#endif
#ifdef CONFIG_CUDA
{
.name = "gpu_dev_id",
.lname = "GPU device ID",
.type = FIO_OPT_INT,
.off1 = offsetof(struct thread_options, gpu_dev_id),
.help = "Set GPU device ID for GPUDirect RDMA",
.def = "0",
.category = FIO_OPT_C_GENERAL,
.group = FIO_OPT_G_INVALID,
},
#endif
{
.name = "end_fsync",
Expand Down
5 changes: 5 additions & 0 deletions thread_options.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ enum fio_memtype {
MEM_MMAP, /* use anonynomous mmap */
MEM_MMAPHUGE, /* memory mapped huge file */
MEM_MMAPSHARED, /* use mmap with shared flag */
#ifdef CONFIG_CUDA
MEM_CUDA_MALLOC,/* use GPU memory */
#endif
};

#define ERROR_STR_MAX 128
Expand Down Expand Up @@ -198,6 +201,8 @@ struct thread_options {
unsigned short numa_mem_mode;
unsigned int numa_mem_prefer_node;
char *numa_memnodes;
unsigned int gpu_dev_id;

unsigned int iolog;
unsigned int rwmixcycle;
unsigned int rwmix[DDIR_RWDIR_CNT];
Expand Down

0 comments on commit 0355385

Please sign in to comment.