Skip to content

Commit

Permalink
DAOS-14494 test: Add a a new data cache mode for dfuse. (daos-stack#1…
Browse files Browse the repository at this point in the history
…3172)

Support a new "open to close" cache mode for dfuse data. This mode
allows caching whilst the file is open but does not allow cache re-use
after close. This is similar to disabling caching in terms of impact but
does allow MAP_SHARED to work on dfuse mounts without needing to
set a data cache timeout.

Add a new test for this mode.

Signed-off-by: Ashley Pittman <[email protected]>
  • Loading branch information
ashleypittman authored Feb 22, 2024
1 parent f5e1705 commit cd08bd5
Show file tree
Hide file tree
Showing 10 changed files with 125 additions and 29 deletions.
17 changes: 10 additions & 7 deletions docs/user/filesystem.md
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ The following features from POSIX are not supported:
* Hard links
* mmap support with MAP\_SHARED will be consistent from single client only and only when data
caching is enabled. Note that this is supported through DFUSE only (i.e. not through the DFS API).
The dfuse-data-cache=otoc container attribute allows this without enabling other caching.
* Char devices, block devices, sockets and pipes
* User/group quotas
* setuid(), setgid() programs, supplementary groups, POSIX ACLs are not supported
Expand Down Expand Up @@ -666,19 +667,21 @@ to be set to 0 or off, except dentry-dir-time which defaults to dentry-time
| dfuse-dentry-time | How long directory entries are cached |
| dfuse-dentry-dir-time | How long dentries are cached, if the entry is itself a directory |
| dfuse-ndentry-time | How long negative dentries are cached |
| dfuse-data-cache | Data caching enabled, duration or ("on"/"true"/"off"/"false") |
| dfuse-data-cache | Data caching enabled, duration or ("on"/"true"/"off"/"false"/"otoc") |
| dfuse-direct-io-disable | Force use of page cache for this container ("on"/"true"/"off"/"false") |

For metadata caching attributes specify the duration that the cache should be
valid for, specified in seconds or with a 's', 'm', 'h' or 'd' suffix for seconds,
minutes, hours or days.

dfuse-data-cache can be set to a time value or "on", "true", "off" or "false". If set, other values
will log an error and result in the cache being off. The O\_DIRECT flag for open files will be
honored with this option enabled. Files which do not set O\_DIRECT will be cached. Data caching
is controlled by dfuse passing a flag to the kernel on open. If data-cache is enabled then it will
be allowed for files if that file is already open, and timeout value will be the duration between
a previous close call which reduced the open count to zero and the next subsequent call to open.
dfuse-data-cache can be set to a time value or "on", "true", "off", "false" or "otoc". If set,
other values will log an error and result in the cache being off. The O\_DIRECT flag for open files
will be honored with this option enabled. Files which do not set O\_DIRECT will be cached. Data
caching is controlled by dfuse passing a flag to the kernel on open. If data-cache is enabled then
it will be allowed for files, and timeout value will be the duration between a previous close call
which reduced the open count to zero and the next subsequent call to open. A value of "otoc" will
allow the use of the page cache for caching the file whilst open but the cache will only be used
from open to close and not be saved across opens, this allows the use of MAP\_SHARED on files.

dfuse-direct-io-disable will enable data caching, similar to dfuse-data-cache,
however if this is enabled then the O\_DIRECT flag will be ignored, and all
Expand Down
1 change: 1 addition & 0 deletions src/client/dfuse/dfuse.h
Original file line number Diff line number Diff line change
Expand Up @@ -501,6 +501,7 @@ struct dfuse_cont {
double dfc_dentry_dir_timeout;
double dfc_ndentry_timeout;
double dfc_data_timeout;
bool dfc_data_otoc;
bool dfc_direct_io_disable;
};

Expand Down
4 changes: 4 additions & 0 deletions src/client/dfuse/dfuse_core.c
Original file line number Diff line number Diff line change
Expand Up @@ -582,6 +582,10 @@ dfuse_cont_get_cache(struct dfuse_cont *dfc)
have_cache_off = true;
dfc->dfc_data_timeout = 0;
DFUSE_TRA_INFO(dfc, "setting '%s' is disabled", cont_attr_names[i]);
} else if (strncasecmp(buff_addrs[i], "otoc", sizes[i]) == 0) {
dfc->dfc_data_otoc = true;
DFUSE_TRA_INFO(dfc, "setting '%s' is open-to-close",
cont_attr_names[i]);
} else if (dfuse_parse_time(buff_addrs[i], sizes[i], &value) == 0) {
DFUSE_TRA_INFO(dfc, "setting '%s' is %u seconds",
cont_attr_names[i], value);
Expand Down
40 changes: 30 additions & 10 deletions src/client/dfuse/il/int_posix.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* (C) Copyright 2017-2023 Intel Corporation.
* (C) Copyright 2017-2024 Intel Corporation.
*
* SPDX-License-Identifier: BSD-2-Clause-Patent
*/
Expand All @@ -14,6 +14,7 @@
#include <fcntl.h>
#include <unistd.h>
#include <sys/syscall.h>
#include <sys/mman.h>

#include <sys/time.h>
#include <sys/resource.h>
Expand Down Expand Up @@ -1756,6 +1757,8 @@ dfuse_mmap(void *address, size_t length, int prot, int flags, int fd,

rc = vector_get(&fd_table, fd, &entry);
if (rc == 0) {
struct stat buf;

DFUSE_LOG_DEBUG("mmap(address=%p, length=%zu, prot=%d, flags=%d,"
" fd=%d, offset=%zd) "
"intercepted, disabling kernel bypass ", address,
Expand All @@ -1767,6 +1770,11 @@ dfuse_mmap(void *address, size_t length, int prot, int flags, int fd,
entry->fd_status = DFUSE_IO_DIS_MMAP;

vector_decref(&fd_table, entry);

/* DAOS-14494: Force the kernel to update the size before mapping. */
rc = fstat(fd, &buf);
if (rc == -1)
return MAP_FAILED;
}

return __real_mmap(address, length, prot, flags, fd, offset);
Expand All @@ -1780,7 +1788,10 @@ dfuse_ftruncate(int fd, off_t length)

rc = vector_get(&fd_table, fd, &entry);
if (rc != 0)
goto do_real_ftruncate;
goto do_real_fn;

if (drop_reference_if_disabled(entry))
goto do_real_fn;

DFUSE_LOG_DEBUG("ftuncate(fd=%d) intercepted, bypass=%s offset %#lx", fd,
bypass_status[entry->fd_status], length);
Expand All @@ -1795,7 +1806,7 @@ dfuse_ftruncate(int fd, off_t length)
errno = rc;
return -1;

do_real_ftruncate:
do_real_fn:
return __real_ftruncate(fd, length);
}

Expand All @@ -1807,14 +1818,17 @@ dfuse_fsync(int fd)

rc = vector_get(&fd_table, fd, &entry);
if (rc != 0)
goto do_real_fsync;
goto do_real_fn;

if (drop_reference_if_disabled(entry))
goto do_real_fn;

DFUSE_LOG_DEBUG("fsync(fd=%d) intercepted, bypass=%s",
fd, bypass_status[entry->fd_status]);

vector_decref(&fd_table, entry);

do_real_fsync:
do_real_fn:
return __real_fsync(fd);
}

Expand All @@ -1826,14 +1840,17 @@ dfuse_fdatasync(int fd)

rc = vector_get(&fd_table, fd, &entry);
if (rc != 0)
goto do_real_fdatasync;
goto do_real_fn;

if (drop_reference_if_disabled(entry))
goto do_real_fn;

DFUSE_LOG_DEBUG("fdatasync(fd=%d) intercepted, bypass=%s",
fd, bypass_status[entry->fd_status]);

vector_decref(&fd_table, entry);

do_real_fdatasync:
do_real_fn:
return __real_fdatasync(fd);
}

Expand Down Expand Up @@ -2909,14 +2926,17 @@ dfuse___fxstat(int ver, int fd, struct stat *buf)

rc = vector_get(&fd_table, fd, &entry);
if (rc != 0)
goto do_real_fstat;
goto do_real_fn;

if (drop_reference_if_disabled(entry))
goto do_real_fn;

/* Turn off this feature if the kernel is doing metadata caching, in this case it's better
* to use the kernel cache and keep it up-to-date than query the severs each time.
*/
if (!entry->fd_fstat) {
vector_decref(&fd_table, entry);
goto do_real_fstat;
goto do_real_fn;
}

counter = atomic_fetch_add_relaxed(&ioil_iog.iog_fstat_count, 1);
Expand Down Expand Up @@ -2959,7 +2979,7 @@ dfuse___fxstat(int ver, int fd, struct stat *buf)
}

return 0;
do_real_fstat:
do_real_fn:
return __real___fxstat(ver, fd, buf);
}

Expand Down
3 changes: 1 addition & 2 deletions src/client/dfuse/ops/create.c
Original file line number Diff line number Diff line change
Expand Up @@ -194,10 +194,9 @@ dfuse_cb_create(fuse_req_t req, struct dfuse_inode_entry *parent, const char *na

oh->doh_writeable = true;

if (dfs->dfc_data_timeout != 0) {
if (dfs->dfc_data_timeout != 0 || ie->ie_dfs->dfc_data_otoc) {
if (fi->flags & O_DIRECT)
fi_out.direct_io = 1;

} else {
fi_out.direct_io = 1;
}
Expand Down
7 changes: 7 additions & 0 deletions src/client/dfuse/ops/open.c
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,13 @@ dfuse_cb_open(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi)
} else {
prefetch = true;
}
} else if (ie->ie_dfs->dfc_data_otoc) {
/* Open to close caching, this allows the use of shared mmap */
fi_out.direct_io = 0;
fi_out.keep_cache = 0;

if (fi->flags & O_DIRECT)
fi_out.direct_io = 1;
} else {
fi_out.direct_io = 1;
}
Expand Down
8 changes: 4 additions & 4 deletions src/tests/ftest/daos_test/dfuse.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,12 +55,12 @@ def run_test(self, il_lib=None):
cont_attrs['dfuse-dentry-time'] = cache_time
cont_attrs['dfuse-ndentry-time'] = cache_time
elif cache_mode == 'metadata':
cont_attrs['dfuse-data-cache'] = 'off'
cont_attrs['dfuse-data-cache'] = 'otoc'
cont_attrs['dfuse-attr-time'] = cache_time
cont_attrs['dfuse-dentry-time'] = cache_time
cont_attrs['dfuse-ndentry-time'] = cache_time
elif cache_mode == 'nocache':
cont_attrs['dfuse-data-cache'] = 'off'
elif cache_mode == 'otoc':
cont_attrs['dfuse-data-cache'] = 'otoc'
cont_attrs['dfuse-attr-time'] = '0'
cont_attrs['dfuse-dentry-time'] = '0'
cont_attrs['dfuse-ndentry-time'] = '0'
Expand Down Expand Up @@ -92,7 +92,7 @@ def run_test(self, il_lib=None):
daos_test_env['DD_SUBSYS'] = 'all'
daos_test_env['D_LOG_MASK'] = 'INFO,IL=DEBUG'

command = [self.daos_test, '--test-dir', mount_dir, '--io', '--stream']
command = [self.daos_test, '--test-dir', mount_dir, '--io', '--stream', '--mmap']
if use_dfuse:
command.append('--lowfd')
if cache_mode != 'writeback':
Expand Down
5 changes: 2 additions & 3 deletions src/tests/ftest/daos_test/dfuse.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,9 @@ dfuse: !mux
metadata:
disable_wb_cache: true
name: "metadata"
nocache:
otoc:
disable_wb_cache: true
disable_caching: true
name: "nocache"
name: "otoc"
native:
name: "native"
intercept: !mux
Expand Down
2 changes: 1 addition & 1 deletion src/tests/ftest/util/dfuse_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -423,7 +423,7 @@ def start_dfuse(test, dfuse, pool=None, container=None, **params):
if pool:
params['pool'] = pool.identifier
if container:
params['cont'] = container.uuid
params['cont'] = container.identifier
if params:
dfuse.update_params(**params)

Expand Down
67 changes: 65 additions & 2 deletions src/tests/suite/dfuse_test.c
Original file line number Diff line number Diff line change
Expand Up @@ -27,14 +27,15 @@
#include <sys/stat.h>
#include <sys/statfs.h>
#include <sys/ioctl.h>
#include <sys/mman.h>
#include <dirent.h>

#include <dfuse_ioctl.h>

/* Tests can be run by specifying the appropriate argument for a test or all will be run if no test
* is specified.
*/
static const char *all_tests = "ismdl";
static const char *all_tests = "ismdlf";

static void
print_usage()
Expand All @@ -48,6 +49,7 @@ print_usage()
print_message("dfuse_test -m|--metadata\n");
print_message("dfuse_test -d|--directory\n");
print_message("dfuse_test -l|--lowfd\n");
print_message("dfuse_test -f|--mmap\n");
print_message("Default <dfuse_test> runs all tests\n=============\n");
print_message("\n=============================\n");
}
Expand Down Expand Up @@ -509,6 +511,56 @@ do_directory(void **state)
assert_return_code(rc, errno);
}

void
do_mmap(void **state)
{
int root;
int fd;
int rc;
void *addr;

root = open(test_dir, O_PATH | O_DIRECTORY);
assert_return_code(root, errno);

/* Always unlink the file but do not check for errors. If running the test manually the
* file might pre-exist and affect the behavior.
*/
unlinkat(root, "file", 0);

fd = openat(root, "file", O_CREAT | O_RDWR, S_IRUSR | S_IWUSR);
assert_return_code(root, errno);

rc = ftruncate(fd, 1024 * 1024);
assert_return_code(rc, errno);

addr = mmap(NULL, 1024 * 1024, PROT_WRITE, MAP_PRIVATE, fd, 0);
assert_ptr_not_equal(addr, MAP_FAILED);

printf("Mapped private to %p\n", addr);

memset(addr, '0', 1024 * 1024);

rc = munmap(addr, 1024 * 1024);
assert_return_code(rc, errno);

addr = mmap(NULL, 1024 * 1024, PROT_READ, MAP_SHARED, fd, 0);
assert_ptr_not_equal(addr, MAP_FAILED);

printf("Mapped shared to %p\n", addr);

rc = munmap(addr, 1024 * 1024);
assert_return_code(rc, errno);

rc = close(fd);
assert_return_code(rc, errno);

rc = unlinkat(root, "file", 0);
assert_return_code(rc, errno);

rc = close(root);
assert_return_code(rc, errno);
}

#define MIN_DAOS_FD 10
/*
* Check whether daos network context uses low fds 0~9.
Expand Down Expand Up @@ -629,6 +681,16 @@ run_specified_tests(const char *tests, int *sub_tests, int sub_tests_size)
nr_failed += cmocka_run_group_tests(lowfd_tests, NULL, NULL);
break;

case 'f': {
const struct CMUnitTest mmap_tests[] = {
cmocka_unit_test(do_mmap),
};
printf("\n\n=================");
printf("dfuse mmap tests");
printf("=====================\n");
nr_failed += cmocka_run_group_tests(mmap_tests, NULL, NULL);
break;
}
default:
assert_true(0);
}
Expand All @@ -653,10 +715,11 @@ main(int argc, char **argv)
{"stream", no_argument, NULL, 's'},
{"metadata", no_argument, NULL, 'm'},
{"directory", no_argument, NULL, 'd'},
{"mmap", no_argument, NULL, 'f'},
{"lowfd", no_argument, NULL, 'l'},
{NULL, 0, NULL, 0}};

while ((opt = getopt_long(argc, argv, "aM:imsdl", long_options, &index)) != -1) {
while ((opt = getopt_long(argc, argv, "aM:imsdlf", long_options, &index)) != -1) {
if (strchr(all_tests, opt) != NULL) {
tests[ntests] = opt;
ntests++;
Expand Down

0 comments on commit cd08bd5

Please sign in to comment.