Skip to content

Commit

Permalink
exe: add --close-fd to close file descriptors
Browse files Browse the repository at this point in the history
This allows users to specify individual file descriptors or fd ranges
that must be closed prior to executing the spacetime program.

The close semantics are that the file descriptors must be closed after
the setup program executes, but before the spacetime program executes.

Besides ensuring that no file descriptors leak when using bst in scripts
or other automation, this feature also allows the use of file
descriptors to convey out-of-band data to the setup program without
leaking said file descriptor to the spacetime process.
  • Loading branch information
Snaipe committed Mar 31, 2023
1 parent 99838fe commit ddc9036
Show file tree
Hide file tree
Showing 11 changed files with 304 additions and 82 deletions.
88 changes: 88 additions & 0 deletions compat.c
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,21 @@
* in the LICENSE file.
*/

#include "config.h"

#include <dirent.h>
#include <err.h>
#include <errno.h>
#include <limits.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
#include <unistd.h>

#ifdef HAVE_close_range
# include <linux/close_range.h>
#endif

#include "compat.h"

size_t strlcpy(char *restrict dst, const char *restrict src, size_t size)
Expand All @@ -15,3 +30,76 @@ size_t strlcpy(char *restrict dst, const char *restrict src, size_t size)
dst[i] = '\0';
return i;
}

/* parse_fd parses a file descriptor in the range [0,~0). */
unsigned int parse_fd(char *optarg)
{
errno = 0;
long val = strtol(optarg, NULL, 10);
if (errno == 0 && (val >= (long)UINT_MAX || val < 0)) {
errno = ERANGE;
}
if (errno != 0) {
return UINT_MAX;
}
return (unsigned int) val;
}

/* bst_close_range is like close_range(2), except that it works on linux
versions that are too old for the system call. */
int bst_close_range(unsigned int from, unsigned int to, unsigned int flags)
{
int rc = -1;
#ifdef HAVE_close_range
rc = close_range(from, to, flags);
#else
errno = ENOSYS;
#endif

if (rc == -1 && errno == ENOSYS) {
/* The system call is not implemented. Fall back to the good old
fashioned method.
Note that this isn't particularly efficient. bst_close_range is
itself called in a loop, which means traversing the list of fds
for each invocation. I'm not particularly motivated to optimize
this given that the easy answer is to just upgrade your kernel.
2023-03-28 -- Snaipe
*/

DIR *fdlist = opendir("/proc/self/fd");
if (fdlist == NULL) {
err(1, "bst_close_range: open /proc/self/fd");
}

struct dirent *dent;
while ((dent = readdir(fdlist)) != NULL) {
if (dent->d_name[0] == '.') {
// Either . or ..
continue;
}
unsigned int fd = parse_fd(dent->d_name);
if (fd == UINT_MAX) {
err(1, "bst_close_range: %s is not a valid file descriptor number", dent->d_name);
}

if (fd < from || fd > to) {
continue;
}

/* Note: close takes a signed int, while close_range takes unsigned
ints. I'm not too sure how negative file descriptors are handled
(and I don't care much to be honest) so I'll just hope that the
system call just reads out an unsigned integer kernel-side. */

if (close((int) fd) == -1) {
err(1, "bst_close_range: close %d", fd);
}
}

closedir(fdlist);
rc = 0;
}
return rc;
}
5 changes: 5 additions & 0 deletions compat.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,11 @@

# include <stddef.h>

/* From the kernel headers */
# define BST_CLOSE_RANGE_UNSHARE (1U << 1)

size_t strlcpy(char *restrict dst, const char *restrict src, size_t size);
unsigned int parse_fd(char *optarg);
int bst_close_range(unsigned int from, unsigned int to, unsigned int flags);

#endif /* !COMPAT_H_ */
1 change: 1 addition & 0 deletions config.h.in
Original file line number Diff line number Diff line change
Expand Up @@ -13,5 +13,6 @@
# define VERSION "@version@"

#mesondefine HAVE_SYS_mount_setattr
#mesondefine HAVE_close_range

#endif /* !CONFIG_H_ */
6 changes: 6 additions & 0 deletions enter.c
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@

#include "bst_limits.h"
#include "capable.h"
#include "compat.h"
#include "enter.h"
#include "errutil.h"
#include "mount.h"
Expand Down Expand Up @@ -818,6 +819,11 @@ int enter(struct entry_settings *opts)
}
const char *init = opts->init + rootlen;

for (const struct close_range *range = opts->close_fds; range < opts->close_fds + opts->nclose_fds; ++range) {
if (bst_close_range(range->from, range->to, BST_CLOSE_RANGE_UNSHARE) == -1) {
err(1, "close_range %d %d", range->from, range->to);
}
}
execve(init, argv, opts->envp);
err(1, "execve %s", init);
}
Expand Down
9 changes: 9 additions & 0 deletions enter.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,12 +32,18 @@ struct climit {
bool critical;
};

struct close_range {
int from;
int to;
};

enum {
MAX_MOUNT = 4096,
MAX_NICS = 4096,
MAX_ADDRS = 4096,
MAX_ROUTES = 4096,
MAX_CGROUPS = 4096,
MAX_CLOSE_FDS = 4096,
};

/* SHARE_WITH_PARENT is a special value for entry_settings.shares[ns]. */
Expand Down Expand Up @@ -99,6 +105,9 @@ struct entry_settings {
bool tty;
struct tty_opts ttyopts;

size_t nclose_fds;
struct close_range close_fds[MAX_CLOSE_FDS];

int no_copy_hard_rlimits;
int no_fake_devtmpfs;
int no_derandomize;
Expand Down
43 changes: 43 additions & 0 deletions main.c
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ enum {
OPTION_NO_ENV,
OPTION_NO_COPY_HARD_RLIMITS,
OPTION_TTY,
OPTION_CLOSE_FD,
};

static void process_nslist_entry(const char **out, const char *share, const char *path, int append_nsname)
Expand Down Expand Up @@ -307,6 +308,7 @@ int main(int argc, char *argv[], char *envp[])
{ "ip", required_argument, NULL, OPTION_IP },
{ "route", required_argument, NULL, OPTION_ROUTE },
{ "tty", optional_argument, NULL, OPTION_TTY },
{ "close-fd", optional_argument, NULL, OPTION_CLOSE_FD },

/* Opt-out feature flags */
{ "no-copy-hard-rlimits", no_argument, NULL, OPTION_NO_COPY_HARD_RLIMITS },
Expand Down Expand Up @@ -718,6 +720,47 @@ int main(int argc, char *argv[], char *envp[])
break;
}

case OPTION_CLOSE_FD:
{
if (opts.nclose_fds >= MAX_CLOSE_FDS) {
errx(1, "can only close a maximum of %d fds or fd ranges", MAX_CLOSE_FDS);
}

struct close_range *range = &opts.close_fds[opts.nclose_fds];

/* If --close-fd is specified without parameters, "3-" is assumed */
char *from = "3";
char *to = "";

if (optarg) {
char *end = strchr(optarg, '-');
if (end == NULL) {
/* This is a single fd */
from = optarg;
to = from;
} else {
/* We have an fd range; split it */
*end = '\0';

from = optarg;
to = end + 1;
}
}

if ((range->from = parse_fd(from)) == -1) {
err(2, "close-fd: %s is not a valid file descriptor number", from);
}

if (to[0] == '\0') {
range->to = ~0;
} else if ((range->to = parse_fd(to)) == -1) {
err(2, "close-fd: %s is not a valid file descriptor number", to);
}

opts.nclose_fds++;
break;
}

case 'r':
opts.root = optarg;
break;
Expand Down
15 changes: 15 additions & 0 deletions man/bst.1.scd
Original file line number Diff line number Diff line change
Expand Up @@ -350,6 +350,21 @@ _VAR=value_ before the executable to run.
By default bst inherits the parent's terminal device (or lack thereof). Use
the --tty option to allocate a new pty for the child process.

\--close-fd[=<fd>|<from>-[to]]
Close the specified file descriptor or fd range.

In its first form, _--close-fd=<fd>_ is equivalent to _--close-fd=<fd>-<fd>_.

In its second form, _from_ designates the starting file descriptor to close,
and _to_ designates the end file descriptor to close. All file descriptors
between _from_ and _to_, inclusive, are then closed before _executable_
is started.

Note that setup programs still have access to the inherited file
descriptors -- that is, file descriptors specified via --close-fd will
only get closed after the setup program finishes executing. This can
be useful to pass out-of-band data to the setup program without leaking
file descriptors to the spacetime process.

\--no-copy-hard-rlimits
Do not copy hard limit values to soft limits for all resources mentioned above.
Expand Down
1 change: 1 addition & 0 deletions meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ config.set('libexecdir', libexecdir)
config.set('version', version)

config.set('HAVE_SYS_mount_setattr', cc.has_header_symbol('syscall.h', 'SYS_mount_setattr'))
config.set('HAVE_close_range', cc.has_function('close_range'))

configure_file(input: 'config.h.in', output: 'config.h', configuration: config)

Expand Down
34 changes: 34 additions & 0 deletions test/bst.t
Original file line number Diff line number Diff line change
Expand Up @@ -252,3 +252,37 @@ Testing Environment
FOO=bar
ROOT=/
EXECUTABLE=/bin/true

Testing close-fds

$ echo -n '--close-fd=3 should close fd 3: '
> bst --close-fd=3 --setup='cat 0<&3 && echo -n "setup OK, "' sh <<'EOF' 3</dev/null
> sh -c "cat 0<&3" 2>/dev/null \
> && ( echo "exe KO: fd 3 was open in the spacetime"; exit 1 ) \
> || echo "exe OK"
> EOF
--close-fd=3 should close fd 3: setup OK, exe OK

$ echo -n '--close-fd=3-7 should close fd 7: '
> bst --close-fd=3-7 --setup='cat 0<&7 && echo -n "setup OK, "' sh <<'EOF' 7</dev/null
> sh -c "cat 0<&7" 2>/dev/null \
> && ( echo "exe KO: fd 7 was open in the spacetime"; exit 1 ) \
> || echo "exe OK"
> EOF
--close-fd=3-7 should close fd 7: setup OK, exe OK

$ echo -n '--close-fd=3- should close fd 7: '
> bst --close-fd=3- --setup='cat 0<&7 && echo -n "setup OK, "' sh <<'EOF' 7</dev/null
> sh -c "cat 0<&7" 2>/dev/null \
> && ( echo "exe KO: fd 7 was open in the spacetime"; exit 1 ) \
> || echo "exe OK"
> EOF
--close-fd=3- should close fd 7: setup OK, exe OK

$ echo -n '--close-fd=3-7 should not close fd 8: '
> bst --close-fd=3-7 sh <<'EOF' 8</dev/null
> sh -c "cat 0<&8" 2>/dev/null \
> && echo "OK" \
> || ( echo "KO: fd 8 was closed in the spacetime"; exit 1 )
> EOF
--close-fd=3-7 should not close fd 8: OK
Loading

0 comments on commit ddc9036

Please sign in to comment.