Skip to content

Commit

Permalink
context_tracking: New context tracking susbsystem
Browse files Browse the repository at this point in the history
Create a new subsystem that probes on kernel boundaries
to keep track of the transitions between level contexts
with two basic initial contexts: user or kernel.

This is an abstraction of some RCU code that use such tracking
to implement its userspace extended quiescent state.

We need to pull this up from RCU into this new level of indirection
because this tracking is also going to be used to implement an "on
demand" generic virtual cputime accounting. A necessary step to
shutdown the tick while still accounting the cputime.

Signed-off-by: Frederic Weisbecker <[email protected]>
Cc: Andrew Morton <[email protected]>
Cc: H. Peter Anvin <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Paul E. McKenney <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Steven Rostedt <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Cc: Li Zhong <[email protected]>
Cc: Gilad Ben-Yossef <[email protected]>
Reviewed-by: Steven Rostedt <[email protected]>
[ paulmck: fix whitespace error and email address. ]
Signed-off-by: Paul E. McKenney <[email protected]>
  • Loading branch information
fweisbec authored and paulmck committed Nov 30, 2012
1 parent 4e79752 commit 91d1aa4
Show file tree
Hide file tree
Showing 15 changed files with 150 additions and 108 deletions.
15 changes: 8 additions & 7 deletions arch/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -300,15 +300,16 @@ config SECCOMP_FILTER

See Documentation/prctl/seccomp_filter.txt for details.

config HAVE_RCU_USER_QS
config HAVE_CONTEXT_TRACKING
bool
help
Provide kernel entry/exit hooks necessary for userspace
RCU extended quiescent state. Syscalls need to be wrapped inside
rcu_user_exit()-rcu_user_enter() through the slow path using
TIF_NOHZ flag. Exceptions handlers must be wrapped as well. Irqs
are already protected inside rcu_irq_enter/rcu_irq_exit() but
preemption or signal handling on irq exit still need to be protected.
Provide kernel/user boundaries probes necessary for subsystems
that need it, such as userspace RCU extended quiescent state.
Syscalls need to be wrapped inside user_exit()-user_enter() through
the slow path using TIF_NOHZ flag. Exceptions handlers must be
wrapped as well. Irqs are already protected inside
rcu_irq_enter/rcu_irq_exit() but preemption or signal handling on
irq exit still need to be protected.

config HAVE_VIRT_CPU_ACCOUNTING
bool
Expand Down
2 changes: 1 addition & 1 deletion arch/x86/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ config X86
select KTIME_SCALAR if X86_32
select GENERIC_STRNCPY_FROM_USER
select GENERIC_STRNLEN_USER
select HAVE_RCU_USER_QS if X86_64
select HAVE_CONTEXT_TRACKING if X86_64
select HAVE_IRQ_TIME_ACCOUNTING
select GENERIC_KERNEL_THREAD
select GENERIC_KERNEL_EXECVE
Expand Down
Original file line number Diff line number Diff line change
@@ -1,27 +1,26 @@
#ifndef _ASM_X86_RCU_H
#define _ASM_X86_RCU_H
#ifndef _ASM_X86_CONTEXT_TRACKING_H
#define _ASM_X86_CONTEXT_TRACKING_H

#ifndef __ASSEMBLY__

#include <linux/rcupdate.h>
#include <linux/context_tracking.h>
#include <asm/ptrace.h>

static inline void exception_enter(struct pt_regs *regs)
{
rcu_user_exit();
user_exit();
}

static inline void exception_exit(struct pt_regs *regs)
{
#ifdef CONFIG_RCU_USER_QS
#ifdef CONFIG_CONTEXT_TRACKING
if (user_mode(regs))
rcu_user_enter();
user_enter();
#endif
}

#else /* __ASSEMBLY__ */

#ifdef CONFIG_RCU_USER_QS
#ifdef CONFIG_CONTEXT_TRACKING
# define SCHEDULE_USER call schedule_user
#else
# define SCHEDULE_USER call schedule
Expand Down
2 changes: 1 addition & 1 deletion arch/x86/kernel/entry_64.S
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@
#include <asm/ftrace.h>
#include <asm/percpu.h>
#include <asm/asm.h>
#include <asm/rcu.h>
#include <asm/context_tracking.h>
#include <asm/smap.h>
#include <linux/err.h>

Expand Down
8 changes: 4 additions & 4 deletions arch/x86/kernel/ptrace.c
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
#include <linux/signal.h>
#include <linux/perf_event.h>
#include <linux/hw_breakpoint.h>
#include <linux/rcupdate.h>
#include <linux/context_tracking.h>

#include <asm/uaccess.h>
#include <asm/pgtable.h>
Expand Down Expand Up @@ -1461,7 +1461,7 @@ long syscall_trace_enter(struct pt_regs *regs)
{
long ret = 0;

rcu_user_exit();
user_exit();

/*
* If we stepped into a sysenter/syscall insn, it trapped in
Expand Down Expand Up @@ -1516,7 +1516,7 @@ void syscall_trace_leave(struct pt_regs *regs)
* or do_notify_resume(), in which case we can be in RCU
* user mode.
*/
rcu_user_exit();
user_exit();

audit_syscall_exit(regs);

Expand All @@ -1534,5 +1534,5 @@ void syscall_trace_leave(struct pt_regs *regs)
if (step || test_thread_flag(TIF_SYSCALL_TRACE))
tracehook_report_syscall_exit(regs, step);

rcu_user_enter();
user_enter();
}
5 changes: 3 additions & 2 deletions arch/x86/kernel/signal.c
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include <linux/uaccess.h>
#include <linux/user-return-notifier.h>
#include <linux/uprobes.h>
#include <linux/context_tracking.h>

#include <asm/processor.h>
#include <asm/ucontext.h>
Expand Down Expand Up @@ -816,7 +817,7 @@ static void do_signal(struct pt_regs *regs)
void
do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
{
rcu_user_exit();
user_exit();

#ifdef CONFIG_X86_MCE
/* notify userspace of pending MCEs */
Expand All @@ -840,7 +841,7 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
if (thread_info_flags & _TIF_USER_RETURN_NOTIFY)
fire_user_return_notifiers();

rcu_user_enter();
user_enter();
}

void signal_fault(struct pt_regs *regs, void __user *frame, char *where)
Expand Down
2 changes: 1 addition & 1 deletion arch/x86/kernel/traps.c
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@
#include <asm/i387.h>
#include <asm/fpu-internal.h>
#include <asm/mce.h>
#include <asm/rcu.h>
#include <asm/context_tracking.h>

#include <asm/mach_traps.h>

Expand Down
2 changes: 1 addition & 1 deletion arch/x86/mm/fault.c
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
#include <asm/pgalloc.h> /* pgd_*(), ... */
#include <asm/kmemcheck.h> /* kmemcheck_*(), ... */
#include <asm/fixmap.h> /* VSYSCALL_START */
#include <asm/rcu.h> /* exception_enter(), ... */
#include <asm/context_tracking.h> /* exception_enter(), ... */

/*
* Page fault error code bits:
Expand Down
18 changes: 18 additions & 0 deletions include/linux/context_tracking.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
#ifndef _LINUX_CONTEXT_TRACKING_H
#define _LINUX_CONTEXT_TRACKING_H

#ifdef CONFIG_CONTEXT_TRACKING
#include <linux/sched.h>

extern void user_enter(void);
extern void user_exit(void);
extern void context_tracking_task_switch(struct task_struct *prev,
struct task_struct *next);
#else
static inline void user_enter(void) { }
static inline void user_exit(void) { }
static inline void context_tracking_task_switch(struct task_struct *prev,
struct task_struct *next) { }
#endif /* !CONFIG_CONTEXT_TRACKING */

#endif
2 changes: 0 additions & 2 deletions include/linux/rcupdate.h
Original file line number Diff line number Diff line change
Expand Up @@ -222,8 +222,6 @@ extern void rcu_user_enter(void);
extern void rcu_user_exit(void);
extern void rcu_user_enter_after_irq(void);
extern void rcu_user_exit_after_irq(void);
extern void rcu_user_hooks_switch(struct task_struct *prev,
struct task_struct *next);
#else
static inline void rcu_user_enter(void) { }
static inline void rcu_user_exit(void) { }
Expand Down
28 changes: 14 additions & 14 deletions init/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -486,9 +486,13 @@ config PREEMPT_RCU
This option enables preemptible-RCU code that is common between
the TREE_PREEMPT_RCU and TINY_PREEMPT_RCU implementations.

config CONTEXT_TRACKING
bool

config RCU_USER_QS
bool "Consider userspace as in RCU extended quiescent state"
depends on HAVE_RCU_USER_QS && SMP
depends on HAVE_CONTEXT_TRACKING && SMP
select CONTEXT_TRACKING
help
This option sets hooks on kernel / userspace boundaries and
puts RCU in extended quiescent state when the CPU runs in
Expand All @@ -497,24 +501,20 @@ config RCU_USER_QS
try to keep the timer tick on for RCU.

Unless you want to hack and help the development of the full
tickless feature, you shouldn't enable this option. It also
dynticks mode, you shouldn't enable this option. It also
adds unnecessary overhead.

If unsure say N

config RCU_USER_QS_FORCE
bool "Force userspace extended QS by default"
depends on RCU_USER_QS
config CONTEXT_TRACKING_FORCE
bool "Force context tracking"
depends on CONTEXT_TRACKING
help
Set the hooks in user/kernel boundaries by default in order to
test this feature that treats userspace as an extended quiescent
state until we have a real user like a full adaptive nohz option.

Unless you want to hack and help the development of the full
tickless feature, you shouldn't enable this option. It adds
unnecessary overhead.

If unsure say N
Probe on user/kernel boundaries by default in order to
test the features that rely on it such as userspace RCU extended
quiescent states.
This test is there for debugging until we have a real user like the
full dynticks mode.

config RCU_FANOUT
int "Tree-based hierarchical RCU fanout value"
Expand Down
1 change: 1 addition & 0 deletions kernel/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,7 @@ obj-$(CONFIG_USER_RETURN_NOTIFIER) += user-return-notifier.o
obj-$(CONFIG_PADATA) += padata.o
obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
obj-$(CONFIG_JUMP_LABEL) += jump_label.o
obj-$(CONFIG_CONTEXT_TRACKING) += context_tracking.o

$(obj)/configs.o: $(obj)/config_data.h

Expand Down
83 changes: 83 additions & 0 deletions kernel/context_tracking.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
#include <linux/context_tracking.h>
#include <linux/rcupdate.h>
#include <linux/sched.h>
#include <linux/percpu.h>
#include <linux/hardirq.h>

struct context_tracking {
/*
* When active is false, hooks are not set to
* minimize overhead: TIF flags are cleared
* and calls to user_enter/exit are ignored. This
* may be further optimized using static keys.
*/
bool active;
enum {
IN_KERNEL = 0,
IN_USER,
} state;
};

static DEFINE_PER_CPU(struct context_tracking, context_tracking) = {
#ifdef CONFIG_CONTEXT_TRACKING_FORCE
.active = true,
#endif
};

void user_enter(void)
{
unsigned long flags;

/*
* Some contexts may involve an exception occuring in an irq,
* leading to that nesting:
* rcu_irq_enter() rcu_user_exit() rcu_user_exit() rcu_irq_exit()
* This would mess up the dyntick_nesting count though. And rcu_irq_*()
* helpers are enough to protect RCU uses inside the exception. So
* just return immediately if we detect we are in an IRQ.
*/
if (in_interrupt())
return;

WARN_ON_ONCE(!current->mm);

local_irq_save(flags);
if (__this_cpu_read(context_tracking.active) &&
__this_cpu_read(context_tracking.state) != IN_USER) {
__this_cpu_write(context_tracking.state, IN_USER);
rcu_user_enter();
}
local_irq_restore(flags);
}

void user_exit(void)
{
unsigned long flags;

/*
* Some contexts may involve an exception occuring in an irq,
* leading to that nesting:
* rcu_irq_enter() rcu_user_exit() rcu_user_exit() rcu_irq_exit()
* This would mess up the dyntick_nesting count though. And rcu_irq_*()
* helpers are enough to protect RCU uses inside the exception. So
* just return immediately if we detect we are in an IRQ.
*/
if (in_interrupt())
return;

local_irq_save(flags);
if (__this_cpu_read(context_tracking.state) == IN_USER) {
__this_cpu_write(context_tracking.state, IN_KERNEL);
rcu_user_exit();
}
local_irq_restore(flags);
}

void context_tracking_task_switch(struct task_struct *prev,
struct task_struct *next)
{
if (__this_cpu_read(context_tracking.active)) {
clear_tsk_thread_flag(prev, TIF_NOHZ);
set_tsk_thread_flag(next, TIF_NOHZ);
}
}
Loading

0 comments on commit 91d1aa4

Please sign in to comment.