Skip to content

Commit

Permalink
coredump: Don't perform any cleanups before dumping core
Browse files Browse the repository at this point in the history
Rename coredump_exit_mm to coredump_task_exit and call it from do_exit
before PTRACE_EVENT_EXIT, and before any cleanup work for a task
happens.  This ensures that an accurate copy of the process can be
captured in the coredump as no cleanup for the process happens before
the coredump completes.  This also ensures that PTRACE_EVENT_EXIT
will not be visited by any thread until the coredump is complete.

Add a new flag PF_POSTCOREDUMP so that tasks that have passed through
coredump_task_exit can be recognized and ignored in zap_process.

Now that all of the coredumping happens before exit_mm remove code to
test for a coredump in progress from mm_release.

Replace "may_ptrace_stop()" with a simple test of "current->ptrace".
The other tests in may_ptrace_stop all concern avoiding stopping
during a coredump.  These tests are no longer necessary as it is now
guaranteed that fatal_signal_pending will be set if the code enters
ptrace_stop during a coredump.  The code in ptrace_stop is guaranteed
not to stop if fatal_signal_pending returns true.

Until this change "ptrace_event(PTRACE_EVENT_EXIT)" could call
ptrace_stop without fatal_signal_pending being true, as signals are
dequeued in get_signal before calling do_exit.  This is no longer
an issue as "ptrace_event(PTRACE_EVENT_EXIT)" is no longer reached
until after the coredump completes.

Link: https://lkml.kernel.org/r/874kaax26c.fsf@disp2133
Reviewed-by: Kees Cook <[email protected]>
Signed-off-by: "Eric W. Biederman" <[email protected]>
  • Loading branch information
ebiederm committed Oct 6, 2021
1 parent d67e03e commit 9230738
Show file tree
Hide file tree
Showing 6 changed files with 20 additions and 40 deletions.
8 changes: 4 additions & 4 deletions fs/coredump.c
Original file line number Diff line number Diff line change
Expand Up @@ -359,7 +359,7 @@ static int zap_process(struct task_struct *start, int exit_code, int flags)

for_each_thread(start, t) {
task_clear_jobctl_pending(t, JOBCTL_PENDING_MASK);
if (t != current && t->mm) {
if (t != current && !(t->flags & PF_POSTCOREDUMP)) {
sigaddset(&t->pending.signal, SIGKILL);
signal_wake_up(t, 1);
nr++;
Expand Down Expand Up @@ -404,8 +404,8 @@ static int zap_threads(struct task_struct *tsk, struct mm_struct *mm,
*
* do_exit:
* The caller holds mm->mmap_lock. This means that the task which
* uses this mm can't pass coredump_exit_mm(), so it can't exit or
* clear its ->mm.
* uses this mm can't pass coredump_task_exit(), so it can't exit
* or clear its ->mm.
*
* de_thread:
* It does list_replace_rcu(&leader->tasks, &current->tasks),
Expand Down Expand Up @@ -500,7 +500,7 @@ static void coredump_finish(struct mm_struct *mm, bool core_dumped)
next = curr->next;
task = curr->task;
/*
* see coredump_exit_mm(), curr->task must not see
* see coredump_task_exit(), curr->task must not see
* ->task == NULL before we read ->next.
*/
smp_mb();
Expand Down
1 change: 1 addition & 0 deletions include/linux/sched.h
Original file line number Diff line number Diff line change
Expand Up @@ -1664,6 +1664,7 @@ extern struct pid *cad_pid;
#define PF_VCPU 0x00000001 /* I'm a virtual CPU */
#define PF_IDLE 0x00000002 /* I am an IDLE thread */
#define PF_EXITING 0x00000004 /* Getting shut down */
#define PF_POSTCOREDUMP 0x00000008 /* Coredumps should ignore this task */
#define PF_IO_WORKER 0x00000010 /* Task is an IO worker */
#define PF_WQ_WORKER 0x00000020 /* I'm a workqueue worker */
#define PF_FORKNOEXEC 0x00000040 /* Forked but didn't exec */
Expand Down
19 changes: 12 additions & 7 deletions kernel/exit.c
Original file line number Diff line number Diff line change
Expand Up @@ -339,23 +339,29 @@ kill_orphaned_pgrp(struct task_struct *tsk, struct task_struct *parent)
}
}

static void coredump_exit_mm(struct mm_struct *mm)
static void coredump_task_exit(struct task_struct *tsk)
{
struct core_state *core_state;
struct mm_struct *mm;

mm = tsk->mm;
if (!mm)
return;

/*
* Serialize with any possible pending coredump.
* We must hold mmap_lock around checking core_state
* and clearing tsk->mm. The core-inducing thread
* and setting PF_POSTCOREDUMP. The core-inducing thread
* will increment ->nr_threads for each thread in the
* group with ->mm != NULL.
* group without PF_POSTCOREDUMP set.
*/
mmap_read_lock(mm);
tsk->flags |= PF_POSTCOREDUMP;
core_state = mm->core_state;
mmap_read_unlock(mm);
if (core_state) {
struct core_thread self;

mmap_read_unlock(mm);

self.task = current;
if (self.task->flags & PF_SIGNALED)
self.next = xchg(&core_state->dumper.next, &self);
Expand All @@ -375,7 +381,6 @@ static void coredump_exit_mm(struct mm_struct *mm)
freezable_schedule();
}
__set_current_state(TASK_RUNNING);
mmap_read_lock(mm);
}
}

Expand Down Expand Up @@ -480,7 +485,6 @@ static void exit_mm(void)
return;
sync_mm_rss(mm);
mmap_read_lock(mm);
coredump_exit_mm(mm);
mmgrab(mm);
BUG_ON(mm != current->active_mm);
/* more a memory barrier than a real lock */
Expand Down Expand Up @@ -768,6 +772,7 @@ void __noreturn do_exit(long code)
profile_task_exit(tsk);
kcov_task_exit(tsk);

coredump_task_exit(tsk);
ptrace_event(PTRACE_EVENT_EXIT, code);

validate_creds_for_do_exit(tsk);
Expand Down
3 changes: 1 addition & 2 deletions kernel/fork.c
Original file line number Diff line number Diff line change
Expand Up @@ -1392,8 +1392,7 @@ static void mm_release(struct task_struct *tsk, struct mm_struct *mm)
* purposes.
*/
if (tsk->clear_child_tid) {
if (!(tsk->signal->flags & SIGNAL_GROUP_COREDUMP) &&
atomic_read(&mm->mm_users) > 1) {
if (atomic_read(&mm->mm_users) > 1) {
/*
* We don't check the error code - if userspace has
* not set up a proper pointer then tough luck.
Expand Down
27 changes: 1 addition & 26 deletions kernel/signal.c
Original file line number Diff line number Diff line change
Expand Up @@ -2158,31 +2158,6 @@ static void do_notify_parent_cldstop(struct task_struct *tsk,
spin_unlock_irqrestore(&sighand->siglock, flags);
}

static inline bool may_ptrace_stop(void)
{
if (!likely(current->ptrace))
return false;
/*
* Are we in the middle of do_coredump?
* If so and our tracer is also part of the coredump stopping
* is a deadlock situation, and pointless because our tracer
* is dead so don't allow us to stop.
* If SIGKILL was already sent before the caller unlocked
* ->siglock we must see ->core_state != NULL. Otherwise it
* is safe to enter schedule().
*
* This is almost outdated, a task with the pending SIGKILL can't
* block in TASK_TRACED. But PTRACE_EVENT_EXIT can be reported
* after SIGKILL was already dequeued.
*/
if (unlikely(current->mm->core_state) &&
unlikely(current->mm == current->parent->mm))
return false;

return true;
}


/*
* This must be called with current->sighand->siglock held.
*
Expand Down Expand Up @@ -2263,7 +2238,7 @@ static void ptrace_stop(int exit_code, int why, int clear_code, kernel_siginfo_t

spin_unlock_irq(&current->sighand->siglock);
read_lock(&tasklist_lock);
if (may_ptrace_stop()) {
if (likely(current->ptrace)) {
/*
* Notify parents of the stop.
*
Expand Down
2 changes: 1 addition & 1 deletion mm/oom_kill.c
Original file line number Diff line number Diff line change
Expand Up @@ -788,7 +788,7 @@ static inline bool __task_will_free_mem(struct task_struct *task)

/*
* A coredumping process may sleep for an extended period in
* coredump_exit_mm(), so the oom killer cannot assume that
* coredump_task_exit(), so the oom killer cannot assume that
* the process will promptly exit and release memory.
*/
if (sig->flags & SIGNAL_GROUP_COREDUMP)
Expand Down

0 comments on commit 9230738

Please sign in to comment.