Skip to content

Commit

Permalink
Remove requirement for --cgroup. No need for ownership transfer to bst
Browse files Browse the repository at this point in the history
user anymore.

Co-Authored-By: Franklin "Snaipe" Mathieu <[email protected]>
  • Loading branch information
colindrewes and Snaipe committed Jan 13, 2023
1 parent 9dea2d8 commit 491f60b
Show file tree
Hide file tree
Showing 12 changed files with 308 additions and 237 deletions.
12 changes: 0 additions & 12 deletions cgroups.c
Original file line number Diff line number Diff line change
Expand Up @@ -45,15 +45,3 @@ void apply_climits(int cgroupfd, const struct climit *limits) {
}
}
}

/*
* Called when bst terminates. This removes the created bst directory at the given
* cleanfd file descriptor.
*/
void cgroup_clean(int cleanfd, pid_t rootpid) {
char *subcgroup = makepath("bst.%d", rootpid);

if (unlinkat(cleanfd, subcgroup, AT_REMOVEDIR) == -1) {
err(1, "unable to clean cgroup %s", subcgroup);
}
}
1 change: 0 additions & 1 deletion cgroups.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,5 @@ struct climit {
};

void apply_climits(int cgroupfd, const struct climit *limits);
void cgroup_clean(int cleanfd, pid_t rootpid);

#endif /* !CGROUPS_H_ */
99 changes: 58 additions & 41 deletions enter.c
Original file line number Diff line number Diff line change
Expand Up @@ -161,15 +161,54 @@ int enter(struct entry_settings *opts)
errx(1, "cannot create NICs when not in a network namespace");
}

char cgroup_path[PATH_MAX];
/* If there is a cgroup specified we need to create bst.<pid> subcgroup to join
Otherwise if no cgroup is specified but limits are applied bst will error out. */
if (opts->nactiveclimits != 0 || opts->cgroup_path != NULL) {
// If cgroup path is unspecified then attempt to derive from /proc/self/cgroup.
if (opts->cgroup_path == NULL) {
FILE *selfcgroupfd = fopen("/proc/self/cgroup", "r");
if (selfcgroupfd == NULL) {
err(1, "unable to derive current cgroup hierarchy from /proc/self/cgroup");
}

const char *selfcgroup = NULL;
char line[BUFSIZ];
while (fgets(line, sizeof (line), selfcgroupfd) != NULL) {
printf("%s\n", line);
if (strncmp(line, "0::/", sizeof ("0::/") - 1) == 0) {
// Remove newline character read by fgets
line[strcspn(line, "\n")] = '\0';
selfcgroup = line + 3;
printf("found self cgroup %s\n", selfcgroup);
break;
}
}
fclose(selfcgroupfd);

if (selfcgroup != NULL) {
makepath_r(cgroup_path, "/sys/fs/cgroup/%s", selfcgroup);
printf("new cgroup path: %s (from %s and %s)\n", cgroup_path, selfcgroup, line);
}
opts->cgroup_path = cgroup_path;
}

if (opts->nactiveclimits != 0 && opts->cgroup_path == NULL) {
errx(1, "unable to apply limits without --cgroup specified");
}
}

struct outer_helper outer_helper;
outer_helper.persist = opts->persist;
outer_helper.unshare_user = nsactions[NS_USER] == NSACTION_UNSHARE;
memcpy(outer_helper.uid_desired, opts->uid_map, sizeof (outer_helper.uid_desired));
memcpy(outer_helper.gid_desired, opts->gid_map, sizeof (outer_helper.gid_desired));
outer_helper.unshare_net = nsactions[NS_NET] == NSACTION_UNSHARE;
outer_helper.cgroup_enabled = (opts->cgroup_path != NULL);
outer_helper.cgroup_enabled = (opts->nactiveclimits != 0 || opts->cgroup_path != NULL);
outer_helper.nics = opts->nics;
outer_helper.nnics = opts->nnics;
outer_helper.cgroup_path = opts->cgroup_path;
outer_helper.climits = opts->climits;
outer_helper_spawn(&outer_helper);

/* After this point, we must operate with the privilege set of the caller
Expand Down Expand Up @@ -228,53 +267,28 @@ int enter(struct entry_settings *opts)
}
}

pid_t rootpid = getpid();

int cgroupfd;

/*
* If there is a cgroup specified we need to create bst.<pid> subcgroup to join
* Otherwise if no cgroup is specified but limits are applied bst will error out
*/
if (opts->cgroup_path != NULL) {
// Intentionally leave this fd open for cgroup cleanup
cgroupfd = open(opts->cgroup_path, O_PATH | O_DIRECTORY | O_CLOEXEC);
if (cgroupfd == -1) {
err(1, "unable to open cgroup %s", opts->cgroup_path);
}

char *subcgroup = makepath("bst.%d", rootpid);

if (mkdirat(cgroupfd, subcgroup, 0777) == -1) {
err(1, "unable to create sub-hierachy cgroup %s", subcgroup);
}

// Send cleanup cgroup fd to the cleanup process
send_fd(outer_helper.fd, cgroupfd);

int subcgroupfd = openat(cgroupfd, subcgroup, 0);
if (subcgroupfd == -1) {
err(1, "unable to open cgroup %s/%s", opts->cgroup_path, subcgroup);
}

burn(subcgroupfd, "cgroup.procs", "0");

// Cgroup subhierarchy is created, now apply specified limits
apply_climits(subcgroupfd, opts->climits);

close(subcgroupfd);
} else if (opts->nactiveclimits != 0 && opts->cgroup_path == NULL) {
errx(1, "unable to apply limits without --cgroup specified");
}
struct nsid namespaces[] = {
/* User namespace must be entered first and foremost if unprivileged */
{ NS_USER, nsactions[NS_USER] },
{ NS_NET, nsactions[NS_NET] },
{ NS_MNT, nsactions[NS_MNT] },
{ NS_IPC, nsactions[NS_IPC] },
{ NS_PID, nsactions[NS_PID] },
{ NS_UTS, nsactions[NS_UTS] },
{ NS_TIME, nsactions[NS_TIME] },
{ NS_CGROUP, nsactions[NS_CGROUP] },
};

ns_enter(nsactions);
size_t ns_len = lengthof(namespaces);
ns_enter_prefork(namespaces, &ns_len);

/* Some convenience pre-checks */
int mnt_unshare = nsactions[NS_MNT] == NSACTION_UNSHARE;
int uts_unshare = nsactions[NS_UTS] == NSACTION_UNSHARE;
int pid_unshare = nsactions[NS_PID] == NSACTION_UNSHARE;
int cgroup_unshare = nsactions[NS_CGROUP] == NSACTION_UNSHARE;
int net_unshare = nsactions[NS_NET] == NSACTION_UNSHARE;
int time_unshare = nsactions[NS_TIME] == NSACTION_UNSHARE;
int cgroup_unshare = nsactions[NS_CGROUP] == NSACTION_UNSHARE;

/* Just unsharing the mount namespace is not sufficient -- if we don't make
every mount entry private, any change we make will be applied to the
Expand Down Expand Up @@ -454,6 +468,9 @@ int enter(struct entry_settings *opts)
}

outer_helper_sync(&outer_helper);

ns_enter_postfork(namespaces, ns_len);

outer_helper_close(&outer_helper);

int rtnl = init_rtnetlink_socket();
Expand Down
14 changes: 5 additions & 9 deletions man/bst.1.scd
Original file line number Diff line number Diff line change
Expand Up @@ -230,11 +230,9 @@ _VAR=value_ before the executable to run.
Supported options are described in more detail in the *NETWORKING* section.

\--cgroup <path>
Specify the cgroup directory that bst will operate within. bst will create a
cgroup sub-hierarchy at the <path>.

This demands that the bst invoker have r/w access to the cgroup.procs at both
"<path>" and "<path>/.."
Optionally specify the cgroup directory that bst will operate within. bst will create a
cgroup sub-hierarchy at the <path>. If not provided, <path> will assume the
cgroup directory of the current process.

\--limit <resource>=<value>
Apply a cgroup quota <value> to the provided <resource>. Multiple limits can
Expand All @@ -258,10 +256,8 @@ _VAR=value_ before the executable to run.
- pids.max=$MAX

To use a <resource> ensure that the proper controller (io, cpu, memory) is
added to the cgroup.controller at the "<path>" specified by --cgroup. The bst
invoker must also have write access to the given <resource> file and the
ability to write to cgroup.procs at "<path>" specified by --cgroup as well
as the cgroup.procs at "<path>/.."
added to the cgroup.controller at the current process's cgroup or at the <path>
specified by --cgroup if provided.

\--rlimit <resource>=<value>++
\--rlimit <resource>=[hard]:[soft]
Expand Down
84 changes: 52 additions & 32 deletions ns.c
Original file line number Diff line number Diff line change
Expand Up @@ -88,11 +88,6 @@ void opts_to_nsactions(const char *shares[], enum nsaction *nsactions)
}
}

struct nsid {
int ns;
enum nsaction action;
};

static int is_setns(const struct nsid *ns)
{
switch (ns->action) {
Expand All @@ -118,39 +113,16 @@ static int cmp_nsids(const void *lhs, const void *rhs)
return (int) ((intptr_t) lhs - (intptr_t) rhs);
}

void ns_enter(enum nsaction *nsactions)
static void ns_enter_one(struct nsid *ns)
{
/* Enter all relevant namespaces. It's hard to check in advance which
namespaces are supported, so we unshare them one by one in order. */

struct nsid namespaces[] = {
/* User namespace must be entered first and foremost. */
{ NS_USER, nsactions[NS_USER] },
{ NS_NET, nsactions[NS_NET] },
{ NS_MNT, nsactions[NS_MNT] },
{ NS_IPC, nsactions[NS_IPC] },
{ NS_PID, nsactions[NS_PID] },
{ NS_CGROUP, nsactions[NS_CGROUP] },
{ NS_UTS, nsactions[NS_UTS] },
{ NS_TIME, nsactions[NS_TIME] },
};

/* If we have CAP_SYS_ADMIN from the get-go, starting by entering
the userns may restrict us from joining additional namespaces, so
we rearrange the order so that we setns into target nsfs files first. */
if (capable(BST_CAP_SYS_ADMIN)) {
qsort(namespaces, lengthof(namespaces), sizeof (namespaces[0]),
cmp_nsids);
}

for (struct nsid *ns = &namespaces[0]; ns < namespaces + lengthof(namespaces); ++ns) {
switch (ns->action) {
switch (ns->action) {
case NSACTION_UNSHARE:
if (unshare(flags[ns->ns].flag) == -1) {
if (errno == EINVAL) {
/* We realized that the namespace isn't supported -- remove it
from the unshare set. */
nsactions[ns->ns] = NSACTION_SHARE_WITH_PARENT;
//nsactions[ns->ns] = NSACTION_SHARE_WITH_PARENT;
ns->action = NSACTION_SHARE_WITH_PARENT;
} else {
err(1, "unshare %s", flags[ns->ns].proc_ns_name);
}
Expand All @@ -165,6 +137,54 @@ void ns_enter(enum nsaction *nsactions)
err(1, "setns %s", flags[ns->ns].proc_ns_name);
}
break;
}
}

static bool is_postfork_ns(struct nsid *ns)
{
/* For now, only the cgroup namespace needs to be unshared postfork */
return ns->ns == NS_CGROUP;
}

void ns_enter_prefork(struct nsid *namespaces, size_t *len)
{
/* Enter all relevant namespaces. It's hard to check in advance which
namespaces are supported, so we unshare them one by one in order. */

/* If we have CAP_SYS_ADMIN from the get-go, starting by entering
the userns may restrict us from joining additional namespaces, so
we rearrange the order so that we setns into target nsfs files first. */
if (capable(BST_CAP_SYS_ADMIN)) {
qsort(namespaces, *len, sizeof (namespaces[0]),
cmp_nsids);
}

struct nsid *first_postfork = NULL;
struct nsid *ns = &namespaces[0];
for (; ns < namespaces + *len; ++ns) {
if (ns->action != NSACTION_SHARE_WITH_PARENT && is_postfork_ns(ns)) {
first_postfork = ns;
break;
}
ns_enter_one(ns);
}

size_t i = 0;
for (; ns < namespaces + *len; ++ns, ++i) {
if (first_postfork != NULL && !is_postfork_ns(ns)) {
errx(1, "incompatible options: %s namespace must be entered before "
"forking, but must be done after %s namespace is entered post-fork.",
ns_name(ns->ns),
ns_name(first_postfork->ns));
}
namespaces[i] = *ns;
}
*len = i;
}

void ns_enter_postfork(struct nsid *namespaces, size_t len)
{
for (struct nsid *ns = &namespaces[0]; ns < namespaces + len; ++ns) {
ns_enter_one(ns);
}
}
8 changes: 7 additions & 1 deletion ns.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,10 +53,16 @@ enum nsaction {
NSACTION_UNSHARE = -2,
};

struct nsid {
int ns;
enum nsaction action;
};

const char *ns_name(enum nstype);
int ns_cloneflag(enum nstype);

void opts_to_nsactions(const char *shares[], enum nsaction *nsactions);
void ns_enter(enum nsaction *nsactions);
void ns_enter_prefork(struct nsid *namespaces, size_t *len);
void ns_enter_postfork(struct nsid *namespaces, size_t len);

#endif /* !NS_H_ */
Loading

0 comments on commit 491f60b

Please sign in to comment.