Skip to content

Commit

Permalink
DAOS-14511 dfuse: Instruct fuse to invalidate dentries after timeout. (
Browse files Browse the repository at this point in the history
…daos-stack#13065)

Have dfuse instruct the kernel to invalidate old cache-expired inodes.
Create a extra data structure to keep inodes in time order, when inodes
are looked up move them to the end of the list and have a thread checking
the start of the list for expired entries. When entries are expired then after
some additional time call inval_dentry() on the last known dentry which will
cause the kernel to call forget() on the inode and all it's children.

This means that over time as timeouts expire the size of the working set will
decrease as will memory consumption of both dfuse and the kernel and as
a result dfuse will close open containers and detach from pools.

Signed-off-by: Ashley Pittman [email protected]
  • Loading branch information
ashleypittman authored Jan 30, 2024
1 parent 9f78878 commit 5ba0569
Show file tree
Hide file tree
Showing 14 changed files with 637 additions and 60 deletions.
1 change: 1 addition & 0 deletions .clang-format
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ IndentCaseLabels: false
ForEachMacros: ['d_list_for_each_entry',
'd_list_for_each_safe',
'd_list_for_each_entry_safe',
'd_list_for_each_entry_reverse',
'evt_ent_array_for_each']
PointerAlignment: Right
AlignTrailingComments: true
Expand Down
3 changes: 2 additions & 1 deletion src/client/dfuse/SConscript
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ COMMON_SRC = ['dfuse_obj_da.c', 'dfuse_vector.c']
DFUSE_SRC = ['dfuse_core.c',
'dfuse_main.c',
'dfuse_fuseops.c',
'inval.c',
'dfuse_cont.c',
'dfuse_thread.c',
'dfuse_pool.c']
Expand Down Expand Up @@ -190,7 +191,7 @@ def scons():
dfuse_env = env.Clone(LIBS=[])
dfuse_env.AppendUnique(CPPPATH=[Dir('.').srcnode()])
dfuse_env.AppendUnique(CFLAGS=['-pthread'])
dfuse_env.AppendUnique(LIBS=['pthread', 'daos', 'daos_common', 'uuid'])
dfuse_env.AppendUnique(LIBS=['pthread', 'daos', 'daos_common', 'uuid', 'm'])

gcc_env = il_env.Clone(LIBS=[])
gcc_env.AppendUnique(CPPPATH=[Dir('.').srcnode()])
Expand Down
60 changes: 52 additions & 8 deletions src/client/dfuse/dfuse.h
Original file line number Diff line number Diff line change
Expand Up @@ -782,27 +782,28 @@ struct fuse_lowlevel_ops dfuse_ops;
DS_ERROR(-__rc, "fuse_reply_open() error"); \
} while (0)

#define DFUSE_REPLY_CREATE(_ie, req, entry, fi) \
#define DFUSE_REPLY_CREATE(inode, req, entry, fi) \
do { \
int __rc; \
DFUSE_TRA_DEBUG(_ie, "Returning create"); \
_Static_assert(IS_IE(_ie), "Param is not inode entry"); \
(_ie) = NULL; \
__rc = fuse_reply_create(req, &entry, fi); \
DFUSE_TRA_DEBUG(inode, "Returning create"); \
ival_update_inode(inode, (entry).entry_timeout); \
_Static_assert(IS_IE(inode), "Param is not inode entry"); \
(inode) = NULL; \
__rc = fuse_reply_create(req, &entry, fi); \
if (__rc != 0) \
DS_ERROR(-__rc, "fuse_reply_create() error"); \
} while (0)

#define DFUSE_REPLY_ENTRY(inode, req, entry) \
do { \
int __rc; \
DFUSE_TRA_DEBUG(inode, "Returning entry inode %#lx mode %#o size %#zx", \
(entry).attr.st_ino, (entry).attr.st_mode, (entry).attr.st_size); \
if ((entry).attr_timeout > 0) { \
(inode)->ie_stat = (entry).attr; \
dfuse_mcache_set_time(inode); \
} \
DFUSE_TRA_DEBUG(inode, "Returning entry inode %#lx mode %#o size %zi timeout %lf", \
ival_update_inode(inode, (entry).entry_timeout); \
DFUSE_TRA_DEBUG(inode, \
"Returning entry inode %#lx mode %#o size %#zx timeout %lf", \
(entry).attr.st_ino, (entry).attr.st_mode, (entry).attr.st_size, \
(entry).attr_timeout); \
(inode) = NULL; \
Expand All @@ -811,6 +812,19 @@ struct fuse_lowlevel_ops dfuse_ops;
DS_ERROR(-__rc, "fuse_reply_entry() error"); \
} while (0)

#define DFUSE_REPLY_NO_ENTRY(parent, req, timeout) \
do { \
int __rc; \
struct fuse_entry_param _entry = {}; \
_entry.entry_timeout = timeout; \
DFUSE_TRA_DEBUG(parent, "Returning negative entry parent %#lx timeout %lf", \
(parent)->ie_stat.st_ino, _entry.entry_timeout); \
(parent) = NULL; \
__rc = fuse_reply_entry(req, &_entry); \
if (__rc != 0) \
DS_ERROR(-__rc, "fuse_reply_entry() error"); \
} while (0)

#define DFUSE_REPLY_STATFS(_ie, req, stat) \
do { \
int __rc; \
Expand Down Expand Up @@ -884,6 +898,9 @@ struct dfuse_inode_entry {
/* Time of last kernel cache metadata update */
struct timespec ie_mcache_last_update;

/* Time of last kernel cache dentry update */
struct timespec ie_dentry_last_update;

/* Time of last kernel cache data update, also used for kernel readdir caching. */
struct timespec ie_dcache_last_update;

Expand Down Expand Up @@ -920,6 +937,9 @@ struct dfuse_inode_entry {
* Checked on open of a file to determine if pre-caching is used.
*/
ATOMIC bool ie_linear_read;

/* Entry on the evict list */
d_list_t ie_evict_entry;
};

/* Lookup an inode and take a ref on it. */
Expand Down Expand Up @@ -1018,6 +1038,30 @@ dfuse_mcache_evict(struct dfuse_inode_entry *ie);
bool
dfuse_mcache_get_valid(struct dfuse_inode_entry *ie, double max_age, double *timeout);

/* Check the dentry cache setting against a given timeout, and return time left */
bool
dfuse_dentry_get_valid(struct dfuse_inode_entry *ie, double max_age, double *timeout);

/* inval.c */

int
ival_add_cont_buckets(struct dfuse_cont *dfc);

void
ival_drop_inode(struct dfuse_inode_entry *inode);

int
ival_update_inode(struct dfuse_inode_entry *inode, double timeout);

int
ival_init(struct dfuse_info *dfuse_info);

int
ival_thread_start(struct dfuse_info *dfuse_info);

void
ival_thread_stop();

/* Data caching functions */

/* Mark the data cache as up-to-date from now */
Expand Down
14 changes: 4 additions & 10 deletions src/client/dfuse/dfuse_cont.c
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,8 @@ dfuse_cont_lookup(fuse_req_t req, struct dfuse_inode_entry *parent, const char *
* lookups.
*/
if (uuid_parse(name, cont) < 0) {
struct fuse_entry_param entry = {.entry_timeout = 60};

DFUSE_TRA_DEBUG(parent, "Invalid container uuid");
DFUSE_REPLY_ENTRY(parent, req, entry);
DFUSE_REPLY_NO_ENTRY(parent, req, 60);
return;
}

Expand Down Expand Up @@ -97,12 +95,8 @@ dfuse_cont_lookup(fuse_req_t req, struct dfuse_inode_entry *parent, const char *
decref:
d_hash_rec_decref(&dfp->dfp_cont_table, &dfc->dfs_entry);
err:
if (rc == ENOENT) {
struct fuse_entry_param entry = {0};

entry.entry_timeout = parent->ie_dfs->dfc_ndentry_timeout;
DFUSE_REPLY_ENTRY(parent, req, entry);
} else {
if (rc == ENOENT)
DFUSE_REPLY_NO_ENTRY(parent, req, parent->ie_dfs->dfc_ndentry_timeout);
else
DFUSE_REPLY_ERR_RAW(parent, req, rc);
}
}
63 changes: 59 additions & 4 deletions src/client/dfuse/dfuse_core.c
Original file line number Diff line number Diff line change
Expand Up @@ -723,10 +723,15 @@ dfuse_cont_open_by_label(struct dfuse_info *dfuse_info, struct dfuse_pool *dfp,
} else if (rc != 0) {
D_GOTO(err_close, rc);
}

} else {
DFUSE_TRA_INFO(dfc, "Caching disabled");
}

rc = ival_add_cont_buckets(dfc);
if (rc)
goto err_close;

rc = dfuse_cont_open(dfuse_info, dfp, &c_info.ci_uuid, &dfc);
if (rc) {
D_FREE(dfc);
Expand Down Expand Up @@ -805,9 +810,9 @@ dfuse_cont_open(struct dfuse_info *dfuse_info, struct dfuse_pool *dfp, uuid_t *c
/* Turn on some caching of metadata, otherwise container
* operations will be very frequent
*/
dfc->dfc_attr_timeout = 60;
dfc->dfc_dentry_dir_timeout = 60;
dfc->dfc_ndentry_timeout = 60;
dfc->dfc_attr_timeout = 60 * 5;
dfc->dfc_dentry_dir_timeout = 60 * 5;
dfc->dfc_ndentry_timeout = 60 * 5;

} else if (*_dfc == NULL) {
char str[37];
Expand Down Expand Up @@ -845,9 +850,15 @@ dfuse_cont_open(struct dfuse_info *dfuse_info, struct dfuse_pool *dfp, uuid_t *c
} else if (rc != 0) {
D_GOTO(err_umount, rc);
}

} else {
DFUSE_TRA_INFO(dfc, "Caching disabled");
}

rc = ival_add_cont_buckets(dfc);
if (rc != 0)
goto err_umount;

} else {
/* This is either a container where a label is set on the
* command line, or one created through mkdir, in either case
Expand Down Expand Up @@ -948,6 +959,38 @@ dfuse_mcache_get_valid(struct dfuse_inode_entry *ie, double max_age, double *tim
return use;
}

bool
dfuse_dentry_get_valid(struct dfuse_inode_entry *ie, double max_age, double *timeout)
{
bool use = false;
struct timespec now;
struct timespec left;
double time_left;

D_ASSERT(max_age != -1);
D_ASSERT(max_age >= 0);

if (ie->ie_dentry_last_update.tv_sec == 0)
return false;

clock_gettime(CLOCK_MONOTONIC_COARSE, &now);

left.tv_sec = now.tv_sec - ie->ie_dentry_last_update.tv_sec;
left.tv_nsec = now.tv_nsec - ie->ie_dentry_last_update.tv_nsec;
if (left.tv_nsec < 0) {
left.tv_sec--;
left.tv_nsec += 1000000000;
}
time_left = max_age - (left.tv_sec + ((double)left.tv_nsec / 1000000000));
if (time_left > 0)
use = true;

if (use && timeout)
*timeout = time_left;

return use;
}

/* Set a timer to mark cache entry as valid */
void
dfuse_dcache_set_time(struct dfuse_inode_entry *ie)
Expand Down Expand Up @@ -1029,6 +1072,10 @@ dfuse_fs_init(struct dfuse_info *dfuse_info)
if (rc != 0)
D_GOTO(err_pt, rc);

rc = ival_init(dfuse_info);
if (rc != 0)
D_GOTO(err_it, rc = d_errno2der(rc));

atomic_init(&dfuse_info->di_ino_next, 2);
atomic_init(&dfuse_info->di_eqt_idx, 0);

Expand Down Expand Up @@ -1081,6 +1128,9 @@ dfuse_fs_init(struct dfuse_info *dfuse_info)
sem_destroy(&eqt->de_sem);
DFUSE_TRA_DOWN(eqt);
}

ival_thread_stop();
err_it:
d_hash_table_destroy_inplace(&dfuse_info->dpi_iet, false);
err_pt:
d_hash_table_destroy_inplace(&dfuse_info->di_pool_table, false);
Expand Down Expand Up @@ -1110,7 +1160,7 @@ dfuse_ie_init(struct dfuse_info *dfuse_info, struct dfuse_inode_entry *ie)
atomic_init(&ie->ie_open_write_count, 0);
atomic_init(&ie->ie_il_count, 0);
atomic_fetch_add_relaxed(&dfuse_info->di_inode_count, 1);

D_INIT_LIST_HEAD(&ie->ie_evict_entry);
D_MUTEX_INIT(&ie->ie_lock, NULL);
}

Expand All @@ -1120,6 +1170,8 @@ dfuse_ie_close(struct dfuse_info *dfuse_info, struct dfuse_inode_entry *ie)
int rc;
uint32_t ref;

ival_drop_inode(ie);

ref = atomic_load_relaxed(&ie->ie_ref);
DFUSE_TRA_DEBUG(ie, "closing, inode %#lx ref %u, name " DF_DE ", parent %#lx",
ie->ie_stat.st_ino, ref, DP_DE(ie->ie_name), ie->ie_parent);
Expand Down Expand Up @@ -1458,6 +1510,9 @@ dfuse_fs_stop(struct dfuse_info *dfuse_info)
sem_post(&eqt->de_sem);
}

/* Stop and drain invalidation queues */
ival_thread_stop();

for (i = 0; i < dfuse_info->di_eq_count; i++) {
struct dfuse_eq *eqt = &dfuse_info->di_eqt[i];

Expand Down
8 changes: 8 additions & 0 deletions src/client/dfuse/dfuse_main.c
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,10 @@ dfuse_launch_fuse(struct dfuse_info *dfuse_info, struct fuse_args *args)
return -DER_INVAL;
}

rc = ival_thread_start(dfuse_info);
if (rc != 0)
D_GOTO(umount, rc = daos_errno2der(rc));

rc = dfuse_send_to_fg(0);
if (rc != -DER_SUCCESS)
DFUSE_TRA_ERROR(dfuse_info, "Error sending signal to fg: "DF_RC, DP_RC(rc));
Expand All @@ -206,6 +210,8 @@ dfuse_launch_fuse(struct dfuse_info *dfuse_info, struct fuse_args *args)
if (rc != 0)
DHS_ERROR(dfuse_info, rc, "Fuse loop exited");

umount:

fuse_session_unmount(dfuse_info->di_session);

return daos_errno2der(rc);
Expand Down Expand Up @@ -717,6 +723,8 @@ main(int argc, char **argv)
out_pool:
d_hash_rec_decref(&dfuse_info->di_pool_table, &dfp->dfp_entry);
out_daos:
ival_thread_stop();

rc2 = dfuse_fs_fini(dfuse_info);
if (rc == -DER_SUCCESS)
rc = rc2;
Expand Down
14 changes: 4 additions & 10 deletions src/client/dfuse/dfuse_pool.c
Original file line number Diff line number Diff line change
Expand Up @@ -38,10 +38,8 @@ dfuse_pool_lookup(fuse_req_t req, struct dfuse_inode_entry *parent, const char *
* lookups.
*/
if (uuid_parse(name, pool) < 0) {
struct fuse_entry_param entry = {.entry_timeout = 60};

DFUSE_TRA_DEBUG(parent, "Invalid pool uuid");
DFUSE_REPLY_ENTRY(parent, req, entry);
DFUSE_REPLY_NO_ENTRY(parent, req, 60);
return;
}

Expand Down Expand Up @@ -134,12 +132,8 @@ dfuse_pool_lookup(fuse_req_t req, struct dfuse_inode_entry *parent, const char *
dfuse_ie_free(dfuse_info, ie);
daos_prop_free(prop);
err:
if (rc == ENOENT) {
struct fuse_entry_param entry = {0};

entry.entry_timeout = parent->ie_dfs->dfc_ndentry_timeout;
DFUSE_REPLY_ENTRY(parent, req, entry);
} else {
if (rc == ENOENT)
DFUSE_REPLY_NO_ENTRY(parent, req, parent->ie_dfs->dfc_ndentry_timeout);
else
DFUSE_REPLY_ERR_RAW(parent, req, rc);
}
}
Loading

0 comments on commit 5ba0569

Please sign in to comment.