diff --git a/enter.c b/enter.c index b9f4c30..1c1657b 100644 --- a/enter.c +++ b/enter.c @@ -31,6 +31,7 @@ #include "ns.h" #include "outer.h" #include "path.h" +#include "tty.h" #include "setarch.h" #include "sig.h" #include "util.h" @@ -208,6 +209,11 @@ int enter(struct entry_settings *opts) cleaner and easier to always fork, regardless of unsharing the PID namespace. */ + int parentSock = -1, childSock = -1; + if (opts->tty) { + tty_setup_socketpair(&parentSock, &childSock); + } + pid_t pid = fork(); if (pid == -1) { if (errno == ENOMEM && nsactions[NS_PID] >= 0) { @@ -217,6 +223,9 @@ int enter(struct entry_settings *opts) } if (pid) { + if (childSock >= 0) { + close(childSock); + } if (opts->pidfile != NULL) { int pidfile = open(opts->pidfile, O_WRONLY | O_CREAT | O_CLOEXEC | O_NOCTTY , 0666); if (pidfile == -1) { @@ -260,14 +269,24 @@ int enter(struct entry_settings *opts) sigset_t mask; sigfillset(&mask); + if (parentSock >= 0) { + tty_parent_setup(parentSock); + } + for (;;) { + int waitflags = WEXITED | WNOHANG; siginfo_t info; - sig_wait(&mask, &info); - sig_forward(&info, pid); - - if (info.si_signo != SIGCHLD) { - continue; + if (parentSock < 0) { + sig_wait(&mask, &info); + sig_forward(&info, pid); + if (info.si_signo != SIGCHLD) { + continue; + } + } else { + if (!tty_parent_select(pid, &waitflags)) { + continue; + } } /* We might have been run as a process subreaper against our @@ -275,7 +294,7 @@ int enter(struct entry_settings *opts) exited. */ int rc; - while ((rc = waitid(P_ALL, 0, &info, WEXITED | WNOHANG)) != -1) { + while ((rc = waitid(P_ALL, 0, &info, waitflags)) != -1) { if (info.si_signo != SIGCHLD) { break; } @@ -308,6 +327,10 @@ int enter(struct entry_settings *opts) } } + if (parentSock >= 0) { + close(parentSock); + } + sigset_t mask; sigemptyset(&mask); @@ -575,6 +598,10 @@ int enter(struct entry_settings *opts) } } + if (childSock >= 0) { + tty_child(childSock); + } + if (opts->init != NULL && opts->init[0] != '\0') { if (!pid_unshare && prctl(PR_SET_CHILD_SUBREAPER, 1) == -1) { diff --git a/enter.h b/enter.h index 814a7b5..cfe8c94 100644 --- a/enter.h +++ b/enter.h @@ -83,6 +83,8 @@ struct entry_settings { const char *pidfile; + bool tty; + int no_copy_hard_limits; int no_fake_devtmpfs; int no_derandomize; diff --git a/main.c b/main.c index 715b9ac..ff5bbd4 100644 --- a/main.c +++ b/main.c @@ -62,6 +62,7 @@ enum { OPTION_NO_INIT, OPTION_NO_ENV, OPTION_NO_COPY_HARD_LIMITS, + OPTION_TTY, }; static void process_nslist_entry(const char **out, const char *share, const char *path, int append_nsname) @@ -284,6 +285,7 @@ int main(int argc, char *argv[], char *envp[]) { "pidfile", required_argument, NULL, OPTION_PIDFILE }, { "ip", required_argument, NULL, OPTION_IP }, { "route", required_argument, NULL, OPTION_ROUTE }, + { "tty", no_argument, NULL, OPTION_TTY }, /* Opt-out feature flags */ { "no-copy-hard-limits", no_argument, NULL, OPTION_NO_COPY_HARD_LIMITS }, @@ -656,6 +658,10 @@ int main(int argc, char *argv[], char *envp[]) opts.no_env = 1; break; + case OPTION_TTY: + opts.tty = 1; + break; + case 'r': opts.root = optarg; break; diff --git a/man/bst.1.scd b/man/bst.1.scd index 0962c3e..57ce4ff 100644 --- a/man/bst.1.scd +++ b/man/bst.1.scd @@ -328,6 +328,12 @@ _VAR=value_ before the executable to run. the spacetime process. This can cause issues for environment variables like PATH if the rootfs layout is different than the host. +\--tty + Allocate a pty for the process. + + By default bst inherits the parent's terminal device (or lack thereof). Use + the --tty option to allocate a new pty for the child process. + # PRIVILEGE MODEL *bst* strives to be runnable by unprivileged users. However, some code paths diff --git a/meson.build b/meson.build index 155adf9..be4d1a9 100644 --- a/meson.build +++ b/meson.build @@ -87,6 +87,7 @@ bst_sources = [ 'setarch.c', 'sig.c', 'timens.c', + 'tty.c', 'usage.c', 'userns.c', ] diff --git a/test/tty.t b/test/tty.t new file mode 100644 index 0000000..0e2f766 --- /dev/null +++ b/test/tty.t @@ -0,0 +1,5 @@ +#!/usr/bin/env cram.sh + +Allocate a PTY for the spacetime + $ bst --tty --mount devpts,/dev/pts,devpts,mode=620,ptmxmode=666 tty + /dev/pts/0 diff --git a/tty.c b/tty.c new file mode 100644 index 0000000..ec3868f --- /dev/null +++ b/tty.c @@ -0,0 +1,292 @@ +/* Copyright © 2021 Arista Networks, Inc. All rights reserved. + * + * Use of this source code is governed by the MIT license that can be found + * in the LICENSE file. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "sig.h" +#include "tty.h" + +void recv_fd(int socket, int *pFd) { + char buf[1]; + struct iovec iov[1] = { + [0] = {.iov_base = buf, .iov_len = 1 } + }; + union { + struct cmsghdr _align; + char ctrl[CMSG_SPACE(sizeof(int))]; + } uCtrl; + struct msghdr msg = { + .msg_control = uCtrl.ctrl, + .msg_controllen = sizeof(uCtrl.ctrl), + .msg_name = NULL, + .msg_namelen = 0, + .msg_iov = iov, + .msg_iovlen = 1, + }; + + if ((recvmsg(socket, &msg, 0)) <= 0) { + err(1, "recv_fd: recvmsg"); + } + + struct cmsghdr *pCm; + if (((pCm = CMSG_FIRSTHDR(&msg)) != NULL) && + pCm->cmsg_len == CMSG_LEN(sizeof(int))) { + if (pCm->cmsg_level != SOL_SOCKET) { + errx(1, "recv_fd: control level != SOL_SOCKET"); + } + if (pCm->cmsg_type != SCM_RIGHTS) { + errx(1, "recv_fd: control type != SCM_RIGHTS"); + } + *pFd = *((int*) CMSG_DATA(pCm)); + } else { + errx(1, "recv_fd: no descriptor passed"); + } +} + +void send_fd(int socket, int fd) { + char buf[1] = {0}; + struct iovec iov[1] = { + [0] = {.iov_base = buf, .iov_len = 1 } + }; + union { + struct cmsghdr _align; + char ctrl[CMSG_SPACE(sizeof(int))]; + } uCtrl; + struct msghdr msg = { + .msg_control = uCtrl.ctrl, + .msg_controllen = sizeof(uCtrl.ctrl), + .msg_name = NULL, + .msg_namelen = 0, + .msg_iov = iov, + .msg_iovlen = 1, + }; + struct cmsghdr *pCm = CMSG_FIRSTHDR(&msg); + pCm->cmsg_len = CMSG_LEN(sizeof(int)); + pCm->cmsg_level = SOL_SOCKET; + pCm->cmsg_type = SCM_RIGHTS; + *((int*) CMSG_DATA(pCm)) = fd; + if (sendmsg(socket, &msg, 0) < 0) { + err(1, "send_fd: sendmsg"); + } +} + +static struct tty_parent_info_s { + int termfd; + int sigfd; + fd_set rfds, wfds; + int nfds; + struct termios orig; +} info = { + .termfd = -1, + .sigfd = -1, +}; + +void tty_setup_socketpair(int *pParentSock, int *pChildSock) { + int socks[2]; + if (socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, socks) < 0) { + err(1, "tty_setup: socketpair"); + } + *pParentSock = socks[0]; + *pChildSock = socks[1]; +} + +void tty_parent_cleanup() { + if (info.termfd >= 0) { + close(info.termfd); + } + if (isatty(STDIN_FILENO)) { + tcsetattr(STDIN_FILENO, TCSADRAIN, &info.orig); + } +} + +bool tty_handle_sig(siginfo_t *siginfo) { + if (info.termfd >= 0) return false; + if (!isatty(STDIN_FILENO)) return false; + + struct winsize wsize; + + switch (siginfo->si_signo) { + case SIGWINCH: + if (ioctl(STDIN_FILENO, TIOCGWINSZ, (char*) &wsize) < 0) { + err(1, "reading window size"); + } + if (ioctl(info.termfd, TIOCSWINSZ, (char*) &wsize) < 0) { + err(1, "writing window size"); + } + return true; + } + return false; +} + +bool tty_parent_select(pid_t pid, int *pwaitflags) { + const size_t buflen = 1024; + char buf[buflen]; + sigset_t noSignals; + sigemptyset(&noSignals); + fd_set readFds = info.rfds, writeFds = info.wfds; + bool rtn = false; + + int rc = pselect(info.nfds, &readFds, NULL, NULL, NULL, &noSignals); + if (rc == 0) { + return false; + } + if (rc < 0) { + if (errno == EINTR) { + return false; + } + err(1, "pselect"); + } + struct timespec immediate = {0}; + if (pselect(info.nfds, NULL, &writeFds, NULL, &immediate, &noSignals) <= 0) { + return false; + } + if (FD_ISSET(STDIN_FILENO, &readFds) && FD_ISSET((unsigned long) info.termfd, &writeFds)) { + ssize_t nread = read(STDIN_FILENO, buf, buflen); + if (nread > 0) { + if (write(info.termfd, buf, (size_t) nread) < 0) { + warn("writing to terminal"); + } + } else { + if (nread < 0) { + warn("reading from stdin"); + } + FD_CLR(STDIN_FILENO, &info.rfds); + if (write(info.termfd, &(char){4}, 1) < 0) { + warn("writing EOT to terminal"); + } + rtn = false; + } + } + if (FD_ISSET((unsigned long) info.termfd, &readFds) && FD_ISSET(STDOUT_FILENO, &writeFds)) { + ssize_t nread = read(info.termfd, buf, buflen); + if (nread > 0) { + if (write(STDOUT_FILENO, buf, (size_t) nread) < 0) { + warn("writing to stdout"); + } + } else { + if (nread < 0 && errno != EIO) { + warn("reading from terminal"); + } + FD_CLR((unsigned long) info.termfd, &info.rfds); + rtn = true; + } + } + if (FD_ISSET((unsigned long) info.sigfd, &readFds)) { + struct signalfd_siginfo sigfd_info; + if (read(info.sigfd, &sigfd_info, sizeof(sigfd_info)) == sizeof(sigfd_info)) { + siginfo_t siginfo; + siginfo.si_signo = (int) sigfd_info.ssi_signo; + siginfo.si_code = sigfd_info.ssi_code; + if (!tty_handle_sig(&siginfo)) { + sig_forward(&siginfo, pid); + } + rtn = (sigfd_info.ssi_signo == SIGCHLD); + } + } + if (!FD_ISSET((unsigned long) info.termfd, &info.rfds)) { + *pwaitflags &= ~WNOHANG; + } + return rtn; +} + +void tty_parent_setup(int socket) { + // Put the parent's stdin in raw mode, except add CRLF handling. + struct termios tios; + if (isatty(STDIN_FILENO)) { + if (tcgetattr(STDIN_FILENO, &tios) < 0) { + err(1, "tty_parent: tcgetattr"); + } + info.orig = tios; + cfmakeraw(&tios); + // keep c_oflag the same + tios.c_oflag = info.orig.c_oflag; + if (tcsetattr(STDIN_FILENO, TCSANOW, &tios) < 0) { + err(1, "tty_parent: tcsetattr"); + } + } + atexit(tty_parent_cleanup); + + // Wait for the child to create the pty pair and pass the master back. + // Turn off CRLF handling since that gives us ^Ms in output. + recv_fd(socket, &info.termfd); + if (tcgetattr(info.termfd, &tios) < 0) { + err(1, "tty_parent: tcgetattr"); + } + tios.c_oflag &= ~OPOST; + if (tcsetattr(info.termfd, TCSAFLUSH, &tios) < 0) { + err(1, "tty_parent: tcsetattr"); + } + + sigset_t sigmask; + sigfillset(&sigmask); + if (sigprocmask(SIG_BLOCK, &sigmask, NULL) < 0) { + err(1, "tty_parent: sigprocmask"); + } + if ((info.sigfd = signalfd(-1, &sigmask, 0)) < 0) { + err(1, "tty_parent: signalfd"); + } + FD_ZERO(&info.rfds); + FD_ZERO(&info.wfds); + FD_SET(STDIN_FILENO, &info.rfds); + FD_SET((unsigned long) info.termfd, &info.rfds); + FD_SET((unsigned long) info.sigfd, &info.rfds); + FD_SET(STDOUT_FILENO, &info.wfds); + FD_SET((unsigned long) info.termfd, &info.wfds); + if (info.sigfd > info.termfd) { + info.nfds = info.sigfd + 1; + } else { + info.nfds = info.termfd + 1; + } +} + +void tty_child(int socket) { + int mfd = open("/dev/pts/ptmx", O_RDWR); + if (mfd < 0) { + err(1, "tty_child: open ptmx"); + } + int unlock = 0; + if (ioctl(mfd, TIOCSPTLCK, &unlock) < 0) { + err(1, "tty_child: ioctl(TIOCSPTLCK)"); + } + int sfd = ioctl(mfd, TIOCGPTPEER, O_RDWR); + if (sfd < 0) { + err(1, "tty_child: ioctl(TIOCGPTPEER)"); + } + send_fd(socket, mfd); + close(mfd); + + setsid(); + if (ioctl(sfd, TIOCSCTTY, NULL) < 0) { + err(1, "tty_child: ioctl(TIOCSCTTY)"); + } + if (dup2(sfd, STDIN_FILENO) < 0) { + err(1, "tty_child: dup2(stdin)"); + } + if (dup2(sfd, STDOUT_FILENO) < 0) { + err(1, "tty_child: dup2(stdout)"); + } + if (dup2(sfd, STDERR_FILENO) < 0) { + err(1, "tty_child: dup2(stderr)"); + } + if (sfd > STDERR_FILENO) { + close(sfd); + } +} diff --git a/tty.h b/tty.h new file mode 100644 index 0000000..68c658d --- /dev/null +++ b/tty.h @@ -0,0 +1,18 @@ +/* Copyright © 2021 Arista Networks, Inc. All rights reserved. + * + * Use of this source code is governed by the MIT license that can be found + * in the LICENSE file. + */ + +#ifndef TTY_H_ +# define TTY_H_ + +# include +# include + +void tty_setup_socketpair(int *pParentSock, int *pChildSock); +void tty_parent_setup(int fd); +bool tty_parent_select(pid_t pid, int *pwaitflags); +void tty_child(int fd); + +#endif /* !TTY_H */ diff --git a/usage.txt b/usage.txt index 63e9cc6..81036e8 100644 --- a/usage.txt +++ b/usage.txt @@ -55,3 +55,4 @@ Options: namespaces. --no-env Clear the environment before running the executable. + --tty Allocate a pty for the process.